import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC 

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

from sklearn.ensemble import RandomForestClassifier
from sklearn import tree

# all columns will be displayed
pd.set_option('display.max_columns', None)
# enable interactive plots
%matplotlib notebook


df = pd.read_pickle('./needle_wear_feats_reduced.pkl')


print(df.shape)

(922, 7)


df.head()


df.tail()


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 922 entries, 0 to 921
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Max. Beschl. X [m/s^2]  922 non-null    float64
 1   Max. Beschl. Y [m/s^2]  922 non-null    float64
 2   Max. Beschl. Z [m/s^2]  922 non-null    float64
 3   X_rms                   922 non-null    float64
 4   Y_rms                   922 non-null    float64
 5   Z_rms                   922 non-null    float64
 6   Output                  922 non-null    bool   
dtypes: bool(1), float64(6)
memory usage: 44.2 KB


print(sum(df['Output']))

461


df.describe()


fig, ax1 = plt.subplots(2,2)
plt.subplots_adjust(wspace = 0.5,hspace = 0.4)
ax1[0,0].plot(df[df['Output']]['Max. Beschl. Y [m/s^2]'])
ax1[0,0].set_ylim([4,8])
ax1[0,0].grid(True)
ax1[0,0].set(ylabel='y_max [m/s^2]\nOutput=True')

ax1[0,1].plot(df[~df['Output']]['Max. Beschl. Y [m/s^2]'])
ax1[0,1].set_ylim([4,8])
ax1[0,1].grid(True)
ax1[0,1].set(ylabel='y_max [m/s^2]\nOutput=False')

ax1[1,0].plot(df[df['Output']]['Y_rms'])
ax1[1,0].set_ylim([0,2])
ax1[1,0].grid(True)
ax1[1,0].set(xlabel='Rotation Index', ylabel='y_RMS [m/s^2]\nOutput=True')

ax1[1,1].plot(df[~df['Output']]['Y_rms'])
ax1[1,1].set_ylim([0,2])
ax1[1,1].grid(True)
ax1[1,1].set(xlabel='Rotation Index', ylabel='y_RMS [m/s^2]\nOutput=False')

[Text(0, 0.5, 'y_RMS [m/s^2]\nOutput=False'), Text(0.5, 0, 'Rotation Index')]


matplotlib.rcParams.update({'font.size': 10})
fig, ax = plt.subplots(nrows=1,ncols=3, squeeze=False)
plt.subplots_adjust(wspace = 0.8)
sns.boxplot(x=df['Output'], y=df['X_rms'], ax=ax[0,0])
ax[0,0].grid(True)

sns.boxplot(x=df['Output'], y=df['Y_rms'], ax=ax[0,1])
ax[0,1].grid(True)

sns.boxplot(x=df['Output'], y=df['Z_rms'], ax=ax[0,2])
ax[0,2].grid(True)


X = np.array( df.loc[:, df.columns != 'Output'] )

y = np.array( df['Output'].values, dtype=bool)
    
X_train, X_test, Y_train, Y_test = \
    train_test_split(X, y, test_size=0.30, random_state=1)


models = []
models.append(('LR', LogisticRegression(solver='liblinear')))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVM', SVC(kernel='linear', C=0.01, gamma='auto', probability=True)))

results = []
names = []
for name, model in models:
    kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='precision')
    results.append(cv_results)
    names.append(name)
    print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

LR: 0.971631 (0.021726)
KNN: 0.984356 (0.020992)
SVM: 0.971311 (0.026654)


plt.figure()    
plt.boxplot(results, labels=names)
plt.title('Algorithm Comparison')
plt.ylabel('Model accuracy on test set')
plt.show()


for name, model in models:
    model.fit(X_train, Y_train)
    predictions = model.predict(X_test)
    print(classification_report(Y_test, predictions, zero_division=1))
        
    cm = confusion_matrix(Y_test, predictions, labels=model.classes_, normalize=None)

    disp = ConfusionMatrixDisplay(
            confusion_matrix=cm, display_labels=model.classes_)
    disp.plot()
    plt.title(name)
    plt.show()

              precision    recall  f1-score   support

       False       0.96      0.95      0.96       129
        True       0.96      0.97      0.96       148

    accuracy                           0.96       277
   macro avg       0.96      0.96      0.96       277
weighted avg       0.96      0.96      0.96       277

              precision    recall  f1-score   support

       False       0.97      0.98      0.98       129
        True       0.99      0.97      0.98       148

    accuracy                           0.98       277
   macro avg       0.98      0.98      0.98       277
weighted avg       0.98      0.98      0.98       277

              precision    recall  f1-score   support

       False       0.95      0.97      0.96       129
        True       0.97      0.96      0.97       148

    accuracy                           0.96       277
   macro avg       0.96      0.96      0.96       277
weighted avg       0.96      0.96      0.96       277


df_lub = pd.read_pickle('./lubrication_experiments_reduced.pkl')


print(df_lub.shape)

(8734, 22)


print(df_lub.head())

   Leistung in W  Max. Beschl. X [m/s^2]  Max. Beschl. Y [m/s^2]  \
0       3238.876                  12.161                  22.737   
1       3174.117                  14.261                  22.653   
2       3197.467                  12.568                  22.044   
3       3100.492                  14.227                  24.495   
4       3101.419                  13.260                  24.443   

   Max. Beschl. Z [m/s^2]     X_rms    X_iqr     X_std     Y_rms    Y_iqr  \
0                  12.778  3.393694  4.24400  3.353428  6.070182  8.01600   
1                  10.645  3.289728  4.18500  3.283147  6.426160  8.61000   
2                  11.797  3.300944  4.24350  3.300452  6.453699  8.69750   
3                  11.593  3.285606  4.20875  3.285587  6.434407  8.57675   
4                  11.895  3.325531  4.24200  3.325249  6.414048  8.55900   

      Y_std     Z_rms  Z_iqr     Z_std     F_mean  F_median  F_iqr     F_std  \
0  6.015192  4.465638  5.753  4.151957  15.079621    15.117  1.804  1.374580   
1  6.413566  3.543113  4.775  3.445882  14.820907    14.856  1.604  1.281827   
2  6.449415  3.438915  4.735  3.411841  14.653084    14.661  1.558  1.255530   
3  6.432909  3.447777  4.758  3.437030  14.620409    14.642  1.586  1.256851   
4  6.413067  3.448316  4.785  3.444704  14.532706    14.553  1.577  1.253572   

     T1_mean  T1_median  T1_iqr    T1_std  Lube_quantity  
0  30.303333       30.3     0.2  0.171169            4.0  
1  30.806667       30.8     0.2  0.120153            4.0  
2  31.096000       31.1     0.1  0.073485            4.0  
3  31.300000       31.3     0.0  0.064327            4.0  
4  31.460000       31.5     0.1  0.049827            4.0


print(df_lub.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8734 entries, 0 to 8733
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Leistung in W           8734 non-null   float64
 1   Max. Beschl. X [m/s^2]  8734 non-null   float64
 2   Max. Beschl. Y [m/s^2]  8734 non-null   float64
 3   Max. Beschl. Z [m/s^2]  8734 non-null   float64
 4   X_rms                   8734 non-null   float64
 5   X_iqr                   8734 non-null   float64
 6   X_std                   8734 non-null   float64
 7   Y_rms                   8734 non-null   float64
 8   Y_iqr                   8734 non-null   float64
 9   Y_std                   8734 non-null   float64
 10  Z_rms                   8734 non-null   float64
 11  Z_iqr                   8734 non-null   float64
 12  Z_std                   8734 non-null   float64
 13  F_mean                  8734 non-null   float64
 14  F_median                8734 non-null   float64
 15  F_iqr                   8734 non-null   float64
 16  F_std                   8734 non-null   float64
 17  T1_mean                 8734 non-null   float64
 18  T1_median               8734 non-null   float64
 19  T1_iqr                  8734 non-null   float64
 20  T1_std                  8734 non-null   float64
 21  Lube_quantity           8734 non-null   float64
dtypes: float64(22)
memory usage: 1.5 MB
None


df_lub.describe()


sc = StandardScaler()
sc.fit(df_lub)
df_std = sc.transform(df_lub)
pca = PCA(n_components=3)
df_pca = pca.fit_transform(df_std)

fig = plt.figure()
ax = fig.add_subplot(projection='3d')

color = df_lub['Lube_quantity'].values < 10

ax.scatter(df_pca[color,0], df_pca[color,1], df_pca[color,2],  color='r', label='Lub LOW')
ax.scatter(df_pca[~color,0], df_pca[~color,1], df_pca[~color,2], color='b', label='Lub OK')
ax.view_init(60, 90)
ax.legend()

plt.grid(True)
plt.show()


X = np.array( df_lub.loc[:, df_lub.columns != 'Lube_quantity'] )
fn = df_lub.columns.values[:-1]
y = df_lub['Lube_quantity'].values < 10
cn = ['Lub OK', 'Lub LOW']


print(sum(y)/len(y))

0.14964506526219373


over = SMOTE(sampling_strategy=0.5)
under = RandomUnderSampler(sampling_strategy=1)


X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.30, random_state=42)
   
model = RandomForestClassifier(random_state=42)

steps = [('over', over), ('under', under), ('model', model)]
pipeline = Pipeline(steps=steps)
    
kfold = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=42)


cv_results = cross_val_score(pipeline, X_train, Y_train, cv=kfold, scoring=None)
    
print('%f (%f)' % ( cv_results.mean(), cv_results.std()))
model.fit(X_train, Y_train)

predictions = model.predict(X_test)

print(accuracy_score(Y_test, predictions))
print(confusion_matrix(Y_test, predictions))
print(classification_report(Y_test, predictions, zero_division=1))

cmatrix = confusion_matrix(Y_test, predictions, labels=model.classes_, normalize=None)

disp = ConfusionMatrixDisplay(
        confusion_matrix=cmatrix, display_labels=model.classes_)
disp.plot()
plt.title('Random Forest')
plt.show()

0.999564 (0.000837)
0.9996184662342618
[[2211    0]
 [   1  409]]
              precision    recall  f1-score   support

       False       1.00      1.00      1.00      2211
        True       1.00      1.00      1.00       410

    accuracy                           1.00      2621
   macro avg       1.00      1.00      1.00      2621
weighted avg       1.00      1.00      1.00      2621


plt.figure(figsize=(16,10))
tree.plot_tree(model.estimators_[0],
               feature_names=fn,
               class_names=cn,
               filled=True,
               fontsize=5)
plt.show()


plt.figure(figsize=(16,10))
tree.plot_tree(model.estimators_[49],
               feature_names=fn,
               class_names=cn,
               filled=True,
               fontsize=5)
plt.show()


plt.figure(figsize=(16,10))
tree.plot_tree(model.estimators_[-1],
               feature_names=fn,
               class_names=cn,
               filled=True,
               fontsize=5)
plt.show()


fig = plt.figure()
feats = model.feature_importances_
sorted_idx = model.feature_importances_.argsort()

fig.add_subplot(position=[0.3,0.1,0.7,0.7])

plt.barh(fn[sorted_idx], model.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()

	Max. Beschl. X [m/s^2]	Max. Beschl. Y [m/s^2]	Max. Beschl. Z [m/s^2]	X_rms	Y_rms	Z_rms	Output
0	3.712	5.244	3.279	0.769278	1.174676	0.747236	False
1	4.219	5.452	2.885	0.769091	1.182204	0.741723	False
2	3.518	5.362	2.877	0.770684	1.180357	0.737234	False
3	3.632	5.642	3.715	0.772329	1.179561	0.735970	False
4	3.647	5.503	3.182	0.768243	1.177866	0.735238	False

	Max. Beschl. X [m/s^2]	Max. Beschl. Y [m/s^2]	Max. Beschl. Z [m/s^2]	X_rms	Y_rms	Z_rms	Output
917	3.183	5.669	3.651	0.724559	1.426336	0.891295	True
918	3.253	5.936	4.381	0.724000	1.420956	0.886340	True
919	3.364	6.557	3.669	0.714844	1.441910	0.889366	True
920	3.091	5.930	3.417	0.719386	1.441627	0.883839	True
921	3.181	6.842	3.992	0.720885	1.431710	0.889471	True

	Max. Beschl. X [m/s^2]	Max. Beschl. Y [m/s^2]	Max. Beschl. Z [m/s^2]	X_rms	Y_rms	Z_rms
count	922.000000	922.000000	922.000000	922.000000	922.000000	922.000000
mean	3.531682	5.631131	3.554413	0.761059	1.290041	0.834241
std	0.272764	0.657986	0.331935	0.024472	0.118141	0.065519
min	2.910000	4.274000	2.777000	0.446095	0.694817	0.447021
25%	3.337500	5.025000	3.312250	0.743074	1.183014	0.787878
50%	3.506500	5.659500	3.545000	0.757523	1.285926	0.838573
75%	3.698000	6.159750	3.760250	0.782642	1.408729	0.867008
max	4.787000	7.260000	4.818000	0.805435	1.492705	1.003658

	Leistung in W	Max. Beschl. X [m/s^2]	Max. Beschl. Y [m/s^2]	Max. Beschl. Z [m/s^2]	X_rms	X_iqr	X_std	Y_rms	Y_iqr	Y_std	Z_rms	Z_iqr	Z_std	F_mean	F_median	F_iqr	F_std	T1_mean	T1_median	T1_iqr	T1_std	Lube_quantity
count	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000	8734.000000
mean	2148.148423	12.756695	18.973518	14.758146	3.321028	4.326289	3.315812	5.085360	6.787495	5.083354	5.066368	6.501688	5.063540	9.168636	9.194424	1.437470	1.097263	44.733129	44.731492	0.011767	0.009988	26.511106
std	345.313106	1.923033	3.578653	2.830756	0.447544	0.504264	0.420703	0.923938	1.251373	0.914389	1.907200	2.391246	1.902550	1.620578	1.621049	0.402291	0.268150	5.644646	5.645327	0.068360	0.038364	13.192540
min	999.451000	5.602000	9.093000	6.095000	1.105571	0.428000	1.104810	2.377451	0.217000	2.375853	1.166285	0.820000	1.166105	5.178110	2.630000	1.221000	0.940043	30.105000	30.100000	0.000000	0.000000	4.000000
25%	1964.075000	11.895000	17.217250	13.189000	3.146701	4.163000	3.143656	4.468151	5.907000	4.467678	3.700381	5.154813	3.699369	8.044087	8.070000	1.355000	1.033540	40.721667	40.700000	0.000000	0.000000	20.000000
50%	2067.890500	12.508000	18.205500	14.057500	3.249037	4.326250	3.245784	4.844764	6.502250	4.844737	4.521592	5.557750	4.521323	8.723481	8.747000	1.399875	1.062551	46.000000	46.000000	0.000000	0.000000	20.000000
75%	2231.179250	13.345000	19.683500	15.637000	3.446869	4.437812	3.440956	5.331768	7.151000	5.331801	5.856668	6.750312	5.856507	9.779377	9.805500	1.445188	1.106920	49.400000	49.400000	0.000000	0.000000	40.000000
max	6283.957000	73.079000	79.183000	73.815000	14.792498	24.294750	13.792815	14.979969	24.170250	14.039244	28.037471	37.636250	28.035694	22.715184	23.113000	15.594000	8.926382	64.436000	64.100000	2.250000	1.156478	40.000000

Resource Efficiency on Circular Knitting Machines¶

Application 1: Needle Wear¶

Application 2: Lubricant Amount¶