The script is inent to build a baseline model for creditcard fraud detection.

In [23]:
import os,datetime
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_curve, auc, average_precision_score,f1_score,fbeta_score,precision_recall_curve
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import cross_val_score,cross_validate,train_test_split,StratifiedKFold

Load data and engineer features as discussed in explanatory analysis

In [13]:
creditcard=pd.read_csv('creditcard.csv')
In [14]:
df2=creditcard
df2['Hour']=df2['Time'].apply(lambda x : datetime.datetime.fromtimestamp(x).hour)
df2['Day']=df2['Time'].apply(lambda x : 0 if datetime.datetime.fromtimestamp(x).day==31 else 1)
df2['Amount_A']=np.log10(creditcard['Amount']+1)
df2['V1_A']=np.log10(-creditcard['V1']+3)

Create one-hot vectors for Hour.

In [15]:
Hour= pd.get_dummies(df2['Hour'],prefix='Hour')
df2 = pd.concat([df2,Hour],axis=1)
df2 = df2.drop(["Hour"], axis=1)

remove=["Time","V1","V6","V8","V13","V15","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Amount_A"]
df2= df2.drop(remove, axis =1)
In [16]:
df2.describe
Out[16]:
<bound method NDFrame.describe of                V2        V3        V4        V5        V7        V9       V10  \
0       -0.072781  2.536347  1.378155 -0.338321  0.239599  0.363787  0.090794   
1        0.266151  0.166480  0.448154  0.060018 -0.078803 -0.255425 -0.166974   
2       -1.340163  1.773209  0.379780 -0.503198  0.791461 -1.514654  0.207643   
3       -0.185226  1.792993 -0.863291 -0.010309  0.237609 -1.387024 -0.054952   
4        0.877737  1.548718  0.403034 -0.407193  0.592941  0.817739  0.753074   
5        0.960523  1.141109 -0.168252  0.420987  0.476201 -0.568671 -0.371407   
6        0.141004  0.045371  1.202613  0.191881 -0.005159  0.464960 -0.099254   
7        1.417964  1.074380 -0.492199  0.948934  1.120631  0.615375  1.249376   
8        0.286157 -0.113192 -0.271526  2.669599  0.370145 -0.392048 -0.410430   
9        1.119593  1.044367 -0.222187  0.499361  0.651583 -0.736727 -0.366846   
10      -1.176339  0.913860 -1.375667 -1.971383 -1.423236 -1.720408  1.626659   
11       0.616109 -0.874300 -0.094019  2.924584  0.470455 -0.558895  0.309755   
12      -1.221637  0.383930 -1.234899 -1.485419 -0.689405 -2.094011  1.323729   
13       0.287722  0.828613  2.712520 -0.178398 -0.096717 -0.221083  0.460230   
14      -0.327771  1.641750  1.767473 -0.136588 -0.422911  0.755713  1.151087   
15       0.345485  2.057323 -1.468643 -1.158394 -0.608581 -0.436167  0.747731   
16      -0.040296  1.267332  1.289091 -0.735997 -0.586057  0.782333 -0.267975   
17       0.918966  0.924591 -0.727219  0.915679  0.707642 -0.665271 -0.737980   
18      -5.450148  1.186305  1.736239  3.049106 -1.559738  1.233090  0.345173   
19      -1.029346  0.454795 -1.438026 -1.555434 -1.080664 -1.978682  1.638076   
20      -1.361819  1.029221  0.834159 -1.191209 -0.878586 -0.446196  0.568521   
21       0.328461 -0.171479  2.109204  1.129566  0.107712 -1.191311  0.724396   
22       0.502120 -0.067300  2.261569  0.428804  0.241147 -0.989162  0.922175   
23       0.277666  1.185471 -0.092603 -1.314394 -0.946365  1.544071 -0.829881   
24      -0.044901 -0.405570 -1.013057  2.941968 -0.063063  0.049967  0.573743   
25      -0.121482  1.322021  0.410008  0.295198  0.543985  0.475664  0.149451   
26       0.353498  0.283905  1.133563 -0.172577  0.369025 -0.246651 -0.046139   
27      -0.174041  0.434555  0.576038 -0.836758 -0.264905 -1.071425  0.868559   
28       0.905437  1.727453  1.473471  0.007443  0.740228 -0.593392 -0.346188   
29      -0.175319  1.266130  1.186110 -0.786002 -0.767084  0.699500 -0.064738   
...           ...       ...       ...       ...       ...       ...       ...   
284777  -0.028723 -1.343392  0.358000 -0.045791  0.227476  0.665911  0.028351   
284778   0.588379 -0.907599 -0.418847  0.901528  0.758545 -0.730854 -1.245088   
284779  -0.616244 -2.628295 -0.406246  2.327804 -0.533297  1.128798 -0.220744   
284780   1.108356  2.219561  1.148583 -0.884199 -0.527298  0.853819 -0.195152   
284781  -0.155510  1.894478 -1.138957  1.451777  0.191353 -0.062621 -0.792066   
284782   2.540315 -1.400915  4.846661  0.639105 -0.045911 -2.419986  0.525012   
284783   1.929178 -2.828417 -1.689844  2.199572 -0.270714  0.465804  0.832931   
284784   0.923769 -1.543167 -1.560729  2.833960  0.181576 -0.893890 -1.453432   
284785   0.545338 -1.185844 -1.729828  2.932315  0.337434 -0.165663 -0.386953   
284786   2.142238 -2.522704 -1.888063  1.982785 -1.217430  0.272867  0.300342   
284787  -1.110670 -0.636179 -0.840816  2.424360  0.283610 -0.247488 -0.328271   
284788  -0.280235 -0.208113  0.335261 -0.715798 -0.458972  0.959971 -0.028284   
284789   1.302212 -0.168583  0.981577  0.578957  1.253430 -0.417116  0.076605   
284790   0.971950 -1.014580 -0.677037  0.912430  0.396137 -0.224606 -0.753365   
284791   0.874701  0.420358 -0.530365  0.356561  0.757051 -0.506856 -1.032990   
284792   1.485216 -1.132218 -0.607190  0.709499  0.548393 -0.226323 -0.348134   
284793  -0.699067 -1.697541 -0.617643  1.718797 -1.259306  1.315006 -0.146827   
284794  -0.400461  0.956221 -0.723919  1.531993  0.314741  0.013857 -0.815911   
284795  10.187818 -8.476671 -2.510473 -4.586669 -3.632516  4.893089  8.655320   
284796  -0.143540 -0.999943  1.506772 -0.035300  0.190241  0.666458  0.120908   
284797   0.712247  0.399806 -0.463406  0.244531  0.929369  0.106234 -0.284708   
284798   0.881246 -0.635891  0.960928 -0.152971  0.427126 -0.285670 -0.111640   
284799  -0.004235  1.189786  0.331096  1.196063 -1.518185  1.159498 -0.594242   
284800  -0.175233 -1.196825  0.234580 -0.008713  0.017050  0.435402  0.267772   
284801   0.931005 -0.546012 -0.745097  1.130314  0.812722 -0.204064 -0.657422   
284802  10.071785 -9.834783 -2.066656 -5.364473 -4.918215  1.914428  4.356170   
284803  -0.055080  2.035030 -0.738589  0.868229  0.024330  0.584800 -0.975926   
284804  -0.301254 -3.249640 -0.557828  2.630515 -0.296827  0.432454 -0.484782   
284805   0.530483  0.702510  0.689799 -0.377961 -0.686180  0.392087 -0.399126   
284806  -0.189733  0.703337 -0.506271 -0.012546  1.577006  0.486180 -0.915427   

             V11       V12       V14   ...     Hour_14  Hour_15  Hour_16  \
0      -0.551600 -0.617801 -0.311169   ...         0.0      0.0      0.0   
1       1.612727  1.065235 -0.143772   ...         0.0      0.0      0.0   
2       0.624501  0.066084 -0.165946   ...         0.0      0.0      0.0   
3      -0.226487  0.178228 -0.287924   ...         0.0      0.0      0.0   
4      -0.822843  0.538196 -1.119670   ...         0.0      0.0      0.0   
5       1.341262  0.359894 -0.137134   ...         0.0      0.0      0.0   
6      -1.416907 -0.153826  0.167372   ...         0.0      0.0      0.0   
7      -0.619468  0.291474 -1.323865   ...         0.0      0.0      0.0   
8      -0.705117 -0.110452  0.074355   ...         0.0      0.0      0.0   
9       1.017614  0.836390 -0.443523   ...         0.0      0.0      0.0   
10      1.199644 -0.671440 -0.095045   ...         0.0      0.0      0.0   
11     -0.259116 -0.326143  0.362832   ...         0.0      0.0      0.0   
12      0.227666 -0.242682 -0.317631   ...         0.0      0.0      0.0   
13     -0.773657  0.323387 -0.178485   ...         0.0      0.0      0.0   
14      0.844555  0.792944 -0.734975   ...         0.0      0.0      0.0   
15     -0.793981 -0.770407 -1.066604   ...         0.0      0.0      0.0   
16     -0.450311  0.936708 -0.468647   ...         0.0      0.0      0.0   
17      0.324098  0.277192 -0.291896   ...         0.0      0.0      0.0   
18      0.917230  0.970117 -0.479130   ...         0.0      0.0      0.0   
19      1.077542 -0.632047  0.052011   ...         0.0      0.0      0.0   
20      1.019151  1.298329 -0.372651   ...         0.0      0.0      0.0   
21      1.690330  0.406774  0.983739   ...         0.0      0.0      0.0   
22      0.744786 -0.531377  1.126870   ...         0.0      0.0      0.0   
23     -0.583200  0.524933  0.081393   ...         0.0      0.0      0.0   
24     -0.081257 -0.215745  0.033898   ...         0.0      0.0      0.0   
25     -0.856566 -0.180523 -0.279797   ...         0.0      0.0      0.0   
26     -0.143419  0.979350  0.101418   ...         0.0      0.0      0.0   
27     -0.641506 -0.111316  0.171945   ...         0.0      0.0      0.0   
28     -0.012142  0.786796 -0.086324   ...         0.0      0.0      0.0   
29      1.048292  1.005618 -0.039915   ...         0.0      0.0      0.0   
...          ...       ...       ...   ...         ...      ...      ...   
284777 -0.822911  0.049716  0.569087   ...         0.0      0.0      0.0   
284778  0.874312  0.162691 -0.317789   ...         0.0      0.0      0.0   
284779 -0.590756  0.654596  0.120626   ...         0.0      0.0      0.0   
284780 -1.296770  0.725295 -0.821823   ...         0.0      0.0      0.0   
284781  0.659941  0.995402 -0.300746   ...         0.0      0.0      0.0   
284782  1.045386  0.124585 -1.820716   ...         0.0      0.0      0.0   
284783 -0.344690  0.247153  0.849765   ...         0.0      0.0      0.0   
284784  0.187488 -0.390794 -0.510320   ...         0.0      0.0      0.0   
284785 -0.199626  0.032017  0.354051   ...         0.0      0.0      0.0   
284786 -0.451656  0.566368  0.855742   ...         0.0      0.0      0.0   
284787 -1.089397 -0.694904  0.946281   ...         0.0      0.0      0.0   
284788 -0.635200  0.869261 -0.280317   ...         0.0      0.0      0.0   
284789 -1.291228 -0.690868  0.753473   ...         0.0      0.0      0.0   
284790  0.362990  0.110499 -0.383112   ...         0.0      0.0      0.0   
284791 -1.187546  0.055871  0.550972   ...         0.0      0.0      0.0   
284792 -1.381624  0.617933  0.494490   ...         0.0      0.0      0.0   
284793 -0.222959  0.496509 -0.000744   ...         0.0      0.0      0.0   
284794 -1.311976 -0.946753  0.609469   ...         0.0      0.0      0.0   
284795 -1.052365  2.834865  1.288401   ...         0.0      0.0      0.0   
284796 -1.134176  0.677729  0.002019   ...         0.0      0.0      0.0   
284797 -0.612982 -0.066655  0.237948   ...         0.0      0.0      0.0   
284798 -1.109232 -0.453235  1.122674   ...         0.0      0.0      0.0   
284799 -1.264072  0.453596 -0.858719   ...         0.0      0.0      0.0   
284800  0.523316  0.559047  0.626211   ...         0.0      0.0      0.0   
284801  0.644837  0.190916 -0.731707   ...         0.0      0.0      0.0   
284802 -1.593105  2.711941  4.626942   ...         0.0      0.0      0.0   
284803 -0.150189  0.915802 -0.675143   ...         0.0      0.0      0.0   
284804  0.411614  0.063119 -0.510602   ...         0.0      0.0      0.0   
284805 -1.933849 -0.962886  0.449624   ...         0.0      0.0      0.0   
284806 -1.040458 -0.031513 -0.084316   ...         0.0      0.0      0.0   

        Hour_17  Hour_18  Hour_19  Hour_20  Hour_21  Hour_22  Hour_23  
0           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
1           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
2           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
3           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
4           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
5           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
6           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
7           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
8           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
9           0.0      0.0      1.0      0.0      0.0      0.0      0.0  
10          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
11          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
12          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
13          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
14          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
15          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
16          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
17          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
18          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
19          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
20          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
21          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
22          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
23          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
24          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
25          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
26          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
27          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
28          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
29          0.0      0.0      1.0      0.0      0.0      0.0      0.0  
...         ...      ...      ...      ...      ...      ...      ...  
284777      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284778      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284779      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284780      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284781      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284782      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284783      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284784      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284785      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284786      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284787      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284788      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284789      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284790      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284791      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284792      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284793      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284794      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284795      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284796      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284797      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284798      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284799      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284800      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284801      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284802      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284803      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284804      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284805      0.0      1.0      0.0      0.0      0.0      0.0      0.0  
284806      0.0      1.0      0.0      0.0      0.0      0.0      0.0  

[284807 rows x 40 columns]>
In [17]:
X=df2.iloc[:,df2.columns != 'Class'].as_matrix()
y=df2['Class'].as_matrix()

Split dataset into training set and test set.

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)

Define a neural network classifier.

In [19]:
layers=(100,100,100,100)
learning_rate_init=0.001
alpha=0.0001

#clf = MLPClassifier(solver='adam',activation='tanh',hidden_layer_sizes=layers, learning_rate='adaptive',learning_rate_init=lr_init,alpha=alpha,random_state=2)
clf = MLPClassifier(solver='adam',activation='tanh',
                    hidden_layer_sizes=layers, learning_rate='adaptive',
                    learning_rate_init=learning_rate_init,alpha=alpha,random_state=2345,
                    max_iter=200,verbose=10)


scaler = StandardScaler() 
scaler.fit(X_train) 
X_train_scaled = scaler.transform(X_train)  
X_test_scaled = scaler.transform(X_test)

Scoring the model using 5-fold cross_validation.

In [20]:
scorings ='average_precision'
fold=5
cv=StratifiedKFold(n_splits=fold,random_state=1234)

scores=cross_validate(clf,X_train_scaled,y_train,cv=cv,scoring=scorings,return_train_score=True)
print(scores)
Iteration 1, loss = 0.00726115
Iteration 2, loss = 0.00304983
Iteration 3, loss = 0.00293634
Iteration 4, loss = 0.00291627
Iteration 5, loss = 0.00273234
Iteration 6, loss = 0.00261874
Iteration 7, loss = 0.00243313
Iteration 8, loss = 0.00232922
Iteration 9, loss = 0.00216394
Iteration 10, loss = 0.00210733
Iteration 11, loss = 0.00186784
Iteration 12, loss = 0.00200001
Iteration 13, loss = 0.00182485
Iteration 14, loss = 0.00155822
Iteration 15, loss = 0.00164987
Iteration 16, loss = 0.00151635
Iteration 17, loss = 0.00147565
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00733798
Iteration 2, loss = 0.00339057
Iteration 3, loss = 0.00321147
Iteration 4, loss = 0.00302644
Iteration 5, loss = 0.00295946
Iteration 6, loss = 0.00271361
Iteration 7, loss = 0.00259631
Iteration 8, loss = 0.00259599
Iteration 9, loss = 0.00246507
Iteration 10, loss = 0.00227704
Iteration 11, loss = 0.00229308
Iteration 12, loss = 0.00208181
Iteration 13, loss = 0.00192310
Iteration 14, loss = 0.00186207
Iteration 15, loss = 0.00182030
Iteration 16, loss = 0.00170627
Iteration 17, loss = 0.00159485
Iteration 18, loss = 0.00156173
Iteration 19, loss = 0.00142723
Iteration 20, loss = 0.00134535
Iteration 21, loss = 0.00136923
Iteration 22, loss = 0.00116737
Iteration 23, loss = 0.00115478
Iteration 24, loss = 0.00116354
Iteration 25, loss = 0.00136628
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00724419
Iteration 2, loss = 0.00316202
Iteration 3, loss = 0.00302693
Iteration 4, loss = 0.00290580
Iteration 5, loss = 0.00273328
Iteration 6, loss = 0.00247938
Iteration 7, loss = 0.00244217
Iteration 8, loss = 0.00224629
Iteration 9, loss = 0.00218765
Iteration 10, loss = 0.00209220
Iteration 11, loss = 0.00205630
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00739033
Iteration 2, loss = 0.00328529
Iteration 3, loss = 0.00317451
Iteration 4, loss = 0.00303713
Iteration 5, loss = 0.00281270
Iteration 6, loss = 0.00271856
Iteration 7, loss = 0.00261253
Iteration 8, loss = 0.00251847
Iteration 9, loss = 0.00239738
Iteration 10, loss = 0.00226837
Iteration 11, loss = 0.00220242
Iteration 12, loss = 0.00197301
Iteration 13, loss = 0.00200823
Iteration 14, loss = 0.00186452
Iteration 15, loss = 0.00195911
Iteration 16, loss = 0.00174355
Iteration 17, loss = 0.00176357
Iteration 18, loss = 0.00147805
Iteration 19, loss = 0.00156854
Iteration 20, loss = 0.00150401
Iteration 21, loss = 0.00141505
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00745688
Iteration 2, loss = 0.00331136
Iteration 3, loss = 0.00315537
Iteration 4, loss = 0.00312301
Iteration 5, loss = 0.00285771
Iteration 6, loss = 0.00274737
Iteration 7, loss = 0.00274328
Iteration 8, loss = 0.00252623
Iteration 9, loss = 0.00236932
Iteration 10, loss = 0.00237577
Iteration 11, loss = 0.00212594
Iteration 12, loss = 0.00213281
Iteration 13, loss = 0.00197575
Iteration 14, loss = 0.00195648
Iteration 15, loss = 0.00188137
Iteration 16, loss = 0.00160911
Iteration 17, loss = 0.00164596
Iteration 18, loss = 0.00147036
Iteration 19, loss = 0.00146514
Iteration 20, loss = 0.00136645
Iteration 21, loss = 0.00143989
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
{'train_score': array([ 0.94115178,  0.98211107,  0.92915612,  0.94428042,  0.93705265]), 'fit_time': array([ 131.43800521,  188.31551504,   77.56366301,  152.02440691,
        151.83207178]), 'test_score': array([ 0.79095641,  0.83260128,  0.7414279 ,  0.80886489,  0.81880376]), 'score_time': array([ 1.14719057,  0.91705251,  0.93423152,  0.91129065,  0.94997406])}
In [21]:
np.mean(scores['test_score'])
Out[21]:
0.79853084792878481

Precision-recall Curve for 5-fold cross validation.

In [24]:
def plot_precision_recall(X,y,cv):
    aps=[]
    f1scores=[]
    f2scores=[]
    best_thresholds_f1=[]
    best_thresholds_f2=[]
    i = 0
    for train, test in cv.split(X, y):
        scaler = StandardScaler() 
        scaler.fit(X[train]) 
        X[train] = scaler.transform(X[train])  
        X[test] = scaler.transform(X[test])  
        
        probas_ = clf.fit(X[train], y[train]).predict_proba(X[test])[:, 1]
        # Compute precision recall curve and area the curve
        precision, recall, thresholds = precision_recall_curve(y[test], probas_)
        average_precision = average_precision_score(y[test], probas_)
       # auc = metrics.auc(recall, precision)
        
        
       
       
        plt.step(recall, precision,lw=1, alpha=0.3,
                 label=r'f%d AP=%0.2f' % (i,average_precision))
        aps.append(average_precision)
        
    

        
        
        f1score=2*precision*recall/(precision+recall)  
        f2score=5*precision*recall/(4*precision+recall) 

        f1scores.append(np.amax(f1score))
        f2scores.append(np.amax(f2score))
        best_thresholds_f1.append(thresholds[np.argmax(f1score)])
        best_thresholds_f2.append(thresholds[np.argmax(f2score)])    
         
                
         
#        print("Fold=%d max F1 score" % i)
#        print(metrics.classification_report(y[test],proba_temp))
#        print(metrics.confusion_matrix(y[test],proba_temp))
        
        
#        print("Fold=%d max F2 score" % i)
#        print(metrics.classification_report(y[test],proba_temp))
#        print(metrics.confusion_matrix(y[test],proba_temp))   
        
        i += 1
        
        
    plt.plot([0, 3], [0, 3], linestyle='--', lw=2, color='r',
             label='Luck', alpha=.8)
    
    mean_aps = np.mean(aps, axis=0)
    std_aps= np.std(aps)
    #mean_tpr[-1] = 1.0
    #mean_auc = auc(mean_fpr, mean_tpr)
    
    #plt.plot(mean_fpr, mean_tpr, color='b',
    #         label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
    #         lw=2, alpha=.8)
    
    #std_tpr = np.std(tprs, axis=0)
    #tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    #tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    #plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
    #                 label=r'$\pm$ 1 std. dev.')
    
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall-Curve mean %0.2f $\pm$ %0.2f)' % (mean_aps, std_aps))
    plt.legend(loc="lower left")
    plt.show()
    return  f1scores,f2scores,best_thresholds_f1,best_thresholds_f2

f1scores,f2scores,best_thresholds_f1,best_thresholds_f2=plot_precision_recall(X_train,y_train,cv)
Iteration 1, loss = 0.00726258
Iteration 2, loss = 0.00304964
Iteration 3, loss = 0.00292896
Iteration 4, loss = 0.00291889
Iteration 5, loss = 0.00273388
Iteration 6, loss = 0.00260448
Iteration 7, loss = 0.00245107
Iteration 8, loss = 0.00233255
Iteration 9, loss = 0.00212976
Iteration 10, loss = 0.00219152
Iteration 11, loss = 0.00188749
Iteration 12, loss = 0.00190180
Iteration 13, loss = 0.00183321
Iteration 14, loss = 0.00180111
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00733991
Iteration 2, loss = 0.00338997
Iteration 3, loss = 0.00321430
Iteration 4, loss = 0.00302447
Iteration 5, loss = 0.00295616
Iteration 6, loss = 0.00270583
Iteration 7, loss = 0.00264583
Iteration 8, loss = 0.00255002
Iteration 9, loss = 0.00233396
Iteration 10, loss = 0.00240454
Iteration 11, loss = 0.00213634
Iteration 12, loss = 0.00202146
Iteration 13, loss = 0.00202807
Iteration 14, loss = 0.00187503
Iteration 15, loss = 0.00185142
Iteration 16, loss = 0.00160475
Iteration 17, loss = 0.00174571
Iteration 18, loss = 0.00153419
Iteration 19, loss = 0.00143103
Iteration 20, loss = 0.00157470
Iteration 21, loss = 0.00130756
Iteration 22, loss = 0.00121381
Iteration 23, loss = 0.00119682
Iteration 24, loss = 0.00126240
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00724146
Iteration 2, loss = 0.00316295
Iteration 3, loss = 0.00302700
Iteration 4, loss = 0.00291093
Iteration 5, loss = 0.00273283
Iteration 6, loss = 0.00248810
Iteration 7, loss = 0.00245658
Iteration 8, loss = 0.00227491
Iteration 9, loss = 0.00216650
Iteration 10, loss = 0.00205845
Iteration 11, loss = 0.00205530
Iteration 12, loss = 0.00185121
Iteration 13, loss = 0.00169241
Iteration 14, loss = 0.00183381
Iteration 15, loss = 0.00174781
Iteration 16, loss = 0.00152817
Iteration 17, loss = 0.00149591
Iteration 18, loss = 0.00150770
Iteration 19, loss = 0.00126423
Iteration 20, loss = 0.00143056
Iteration 21, loss = 0.00125706
Iteration 22, loss = 0.00112216
Iteration 23, loss = 0.00115189
Iteration 24, loss = 0.00097189
Iteration 25, loss = 0.00111505
Iteration 26, loss = 0.00102311
Iteration 27, loss = 0.00103195
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00738906
Iteration 2, loss = 0.00328480
Iteration 3, loss = 0.00317426
Iteration 4, loss = 0.00303619
Iteration 5, loss = 0.00281379
Iteration 6, loss = 0.00272499
Iteration 7, loss = 0.00265344
Iteration 8, loss = 0.00253094
Iteration 9, loss = 0.00237007
Iteration 10, loss = 0.00221528
Iteration 11, loss = 0.00214489
Iteration 12, loss = 0.00203214
Iteration 13, loss = 0.00198447
Iteration 14, loss = 0.00182123
Iteration 15, loss = 0.00189858
Iteration 16, loss = 0.00172945
Iteration 17, loss = 0.00171544
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Iteration 1, loss = 0.00745657
Iteration 2, loss = 0.00331172
Iteration 3, loss = 0.00315665
Iteration 4, loss = 0.00312828
Iteration 5, loss = 0.00285299
Iteration 6, loss = 0.00274612
Iteration 7, loss = 0.00275780
Iteration 8, loss = 0.00257804
Iteration 9, loss = 0.00234629
Iteration 10, loss = 0.00228616
Iteration 11, loss = 0.00214001
Iteration 12, loss = 0.00219945
Iteration 13, loss = 0.00208816
Iteration 14, loss = 0.00192299
Iteration 15, loss = 0.00179910
Iteration 16, loss = 0.00163550
Iteration 17, loss = 0.00169802
Iteration 18, loss = 0.00152122
Iteration 19, loss = 0.00148279
Iteration 20, loss = 0.00132545
Iteration 21, loss = 0.00141491
Iteration 22, loss = 0.00115790
Iteration 23, loss = 0.00127703
Iteration 24, loss = 0.00111064
Iteration 25, loss = 0.00109803
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.

The plot shows the model can achieve around 0.8 recall as well as keep the precision around 0.8. That measns the model can detect around 80% of frauds without interfering a lot of customers. However, the curves drop down rapidly when recall is large than 0.8. So if you want to increase the recall above 80%, the pay-off is very expensive as a great number of customers being inspected but only gains a few more frauds being found.

Predict the test set.

In [25]:
scaler = StandardScaler() 
scaler.fit(X_train) 
X_train_scaled = scaler.transform(X_train)  
X_test_scaled = scaler.transform(X_test)




probas_ = clf.fit(X_train_scaled, y_train).predict_proba(X_test_scaled)[:, 1]
y_score=clf.predict_proba(X_test_scaled)[:,1]

#precision, recall, thresholds = precision_recall_curve(y_test, probas_)
average_precision = average_precision_score(y_test, probas_)




predict=(probas_>0.5).astype(int)
            
f1score=f1_score(y_test,predict)   
f2score=fbeta_score(y_test,predict,beta=2)

print("Ap= ",average_precision)        
print("f1 score: ",f1score)
print("f2 score: ",f2score)
print(metrics.classification_report(y_test,predict))
print(metrics.confusion_matrix(y_test,predict))
Iteration 1, loss = 0.00656648
Iteration 2, loss = 0.00328877
Iteration 3, loss = 0.00309144
Iteration 4, loss = 0.00298536
Iteration 5, loss = 0.00283384
Iteration 6, loss = 0.00265084
Iteration 7, loss = 0.00271564
Iteration 8, loss = 0.00238076
Iteration 9, loss = 0.00238580
Iteration 10, loss = 0.00216532
Iteration 11, loss = 0.00209388
Iteration 12, loss = 0.00206604
Iteration 13, loss = 0.00190639
Iteration 14, loss = 0.00182021
Iteration 15, loss = 0.00176023
Iteration 16, loss = 0.00173838
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
Ap=  0.791515075084
f1 score:  0.773869346734
f2 score:  0.78093306288
             precision    recall  f1-score   support

          0       1.00      1.00      1.00     56864
          1       0.76      0.79      0.77        98

avg / total       1.00      1.00      1.00     56962

[[56840    24]
 [   21    77]]