I’ve trained an SVM classifier with my data, looked at the coefficients and then plotted the most important training feature against my target class. However, I found there to be no dependency ( x axis is the class, y axis is the most important feature ). I trained the SVM on whether the class value is 0 or greater than 0 but the image shows all class values in my data set.
My code:
<code>import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics, svm, preprocessing
from sklearn.model_selection import train_test_split
targetRaw = df[ target ]
correlationRaw = df[ mostImportantFeature ]
y = df[ target ]
X = df.drop( columns=[ target ] )
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.4 )
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform( X_train )
X_test = scaler.fit_transform( X_test )
svc = svm.SVC( kernel='linear' )
svc.fit( X_train, y_train )
y_pred = svc.predict( X_test )
print( "Testing Accuracy:", metrics.accuracy_score( y_test, y_pred ) )
coefficients = 15
imp, names = zip( *sorted( zip( svc.coef_[ 0 ], X.columns.values ) ) )
plt.figure( 0 )
plt.xlabel( "Attribution" )
plt.ylabel( "Input Features" )
plt.barh( range( len( names[ -coefficients: ] ) ), imp[ -coefficients: ], align='center' )
plt.yticks( range( len( names[ -coefficients: ] ) ), names[ -coefficients: ] )
plt.savefig( 'SVMCoefficientsAttribution.png', bbox_inches='tight', dpi=100 )
plt.figure( 1 )
plt.xlabel( target )
plt.ylabel( "Feature" )
plt.scatter( targetRaw.tolist(), correlationRaw.tolist(), s= 4.0 )
plt.savefig( 'SVMTargetFeatureCorrelation.png', bbox_inches='tight', dpi=100 )
</code>
<code>import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics, svm, preprocessing
from sklearn.model_selection import train_test_split
targetRaw = df[ target ]
correlationRaw = df[ mostImportantFeature ]
y = df[ target ]
X = df.drop( columns=[ target ] )
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.4 )
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform( X_train )
X_test = scaler.fit_transform( X_test )
svc = svm.SVC( kernel='linear' )
svc.fit( X_train, y_train )
y_pred = svc.predict( X_test )
print( "Testing Accuracy:", metrics.accuracy_score( y_test, y_pred ) )
coefficients = 15
imp, names = zip( *sorted( zip( svc.coef_[ 0 ], X.columns.values ) ) )
plt.figure( 0 )
plt.xlabel( "Attribution" )
plt.ylabel( "Input Features" )
plt.barh( range( len( names[ -coefficients: ] ) ), imp[ -coefficients: ], align='center' )
plt.yticks( range( len( names[ -coefficients: ] ) ), names[ -coefficients: ] )
plt.savefig( 'SVMCoefficientsAttribution.png', bbox_inches='tight', dpi=100 )
plt.figure( 1 )
plt.xlabel( target )
plt.ylabel( "Feature" )
plt.scatter( targetRaw.tolist(), correlationRaw.tolist(), s= 4.0 )
plt.savefig( 'SVMTargetFeatureCorrelation.png', bbox_inches='tight', dpi=100 )
</code>
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics, svm, preprocessing
from sklearn.model_selection import train_test_split
targetRaw = df[ target ]
correlationRaw = df[ mostImportantFeature ]
y = df[ target ]
X = df.drop( columns=[ target ] )
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.4 )
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform( X_train )
X_test = scaler.fit_transform( X_test )
svc = svm.SVC( kernel='linear' )
svc.fit( X_train, y_train )
y_pred = svc.predict( X_test )
print( "Testing Accuracy:", metrics.accuracy_score( y_test, y_pred ) )
coefficients = 15
imp, names = zip( *sorted( zip( svc.coef_[ 0 ], X.columns.values ) ) )
plt.figure( 0 )
plt.xlabel( "Attribution" )
plt.ylabel( "Input Features" )
plt.barh( range( len( names[ -coefficients: ] ) ), imp[ -coefficients: ], align='center' )
plt.yticks( range( len( names[ -coefficients: ] ) ), names[ -coefficients: ] )
plt.savefig( 'SVMCoefficientsAttribution.png', bbox_inches='tight', dpi=100 )
plt.figure( 1 )
plt.xlabel( target )
plt.ylabel( "Feature" )
plt.scatter( targetRaw.tolist(), correlationRaw.tolist(), s= 4.0 )
plt.savefig( 'SVMTargetFeatureCorrelation.png', bbox_inches='tight', dpi=100 )