Table of Contents

AI machine learning

see also:

Introduction

basic steps

Python libraries for AI

Machine learning with Jupyter, sklearn and Pandas

basic code

import pandas as pd
from sklearn.tree import DecisionTreeClassifier # (if this is the model you wish to use)
from sklearn.model_selection import train_test_split # only needed when training model
from sklearn.metrics import accuracy_score # only needed when evaluating model
from sklearn.externals import joblib # needed to save the trained model
from sklearn import tree # only needed to visualise the model tree
df= pd.read_csv('csv_filename') # this imports the csv data file into pandas into a dataframe variable df - this will display the data in a table
df.shape # this will output number of rows and columns
df.describe() # this gives the data statistics of each column - count, mean, std, min, 25%, 50%, 75%, max
df.values() # displays the array 

split the data into train and test parts

X = df.drop(columns=['output_columnname']) # will create a new dataset X with that column removed (by conventional these are capitalized names)
y = df['output_columnname'] # create a new dataset y with only the output column
X_train, X_test, y_train, y_test train_test_split(X,y,test_size = 0.2) # split your data into training and test parts, in this case 20% of data will be used in testing phase
model = DecisionTreeClassifier() # create your model type

now train, view and save your model

model.fit(X_train,y_train) # trains the model
tree.export_graphviz(model, out_file='chartfilename.dot', feature_names = [train_column1, traincolumn2], class_names= sorted(y.unique()), label='all', rounded=True, filled=True) #optionally to save a graphic display of trained and generated decision tree in the model
joblib.dump(model, 'saved_model_filename.joblib') # to save the trained model

now evaluate model using your test dataset:

predictions = model.predict( X_test )
predictions # to display the predictions as an array of predictions
score = accuracy_score(y_test, predictions)
score # to display score

later, you can load your saved model and use it to create predictions without needing to re-train it

import pandas as pd
from sklearn.tree import DecisionTreeClassifier # (if this is the model you wish to use)
from sklearn.externals import joblib # needed to load or save the trained model

model = joblib.load('saved_model_filename.joblib') # to load the trained model
predictions = model.predict( your_new_dataset_array_to_analyse )
predictions # to display the predictions as an array of predictions
1)
2,10