%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


import problem


X_train, y_train = problem.get_train_data()


X_train.head()


y_train[:5]

array(['setosa', 'versicolor', 'versicolor', 'virginica', 'versicolor'],
      dtype=object)


df_train = pd.concat(
    [X_train, pd.DataFrame({"species": y_train})],
    axis=1,
)
sns.pairplot(df_train, hue="species")

<seaborn.axisgrid.PairGrid at 0x7f04e6e35650>


X_test, y_test = problem.get_test_data()
df_test = pd.concat(
    [X_test, pd.DataFrame({"species": y_test})],
    axis=1,
)
sns.pairplot(df_test, hue="species")

<seaborn.axisgrid.PairGrid at 0x7f04e6200450>


from sklearn.ensemble import RandomForestClassifier


def get_estimator():
    clf = RandomForestClassifier(
        n_estimators=5, max_depth=5, random_state=61
    )
    return clf


from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedShuffleSplit

cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=57)
cross_val_score(get_estimator(), X_train, y_train, cv=cv)

array([0.95, 0.95])


model = get_estimator()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.875

	sepal length	sepal width	petal length	petal width
0	5.0	3.5	1.6	0.6
1	6.1	2.8	4.7	1.2
2	6.8	2.8	4.8	1.4
3	6.7	3.1	5.6	2.4
4	5.0	2.0	3.5	1.0

Paris Saclay Center for Data Science ¶

Test RAMP on iris

Introduction¶

Explore the dataset¶

Building predictive models¶

Submission¶

Contact¶

	sepal length	sepal width	petal length	petal width
0	5.0	3.5	1.6	0.6
1	6.1	2.8	4.7	1.2
2	6.8	2.8	4.8	1.4
3	6.7	3.1	5.6	2.4
4	5.0	2.0	3.5	1.0

	sepal length	sepal width	petal length	petal width
0	5.0	3.5	1.6	0.6
1	6.1	2.8	4.7	1.2
2	6.8	2.8	4.8	1.4
3	6.7	3.1	5.6	2.4
4	5.0	2.0	3.5	1.0

Paris Saclay Center for Data Science¶

Test RAMP on iris

Introduction¶

Explore the dataset¶

Building predictive models¶

Submission¶

Contact¶

Paris Saclay Center for Data Science ¶

	sepal length	sepal width	petal length	petal width
0	5.0	3.5	1.6	0.6
1	6.1	2.8	4.7	1.2
2	6.8	2.8	4.8	1.4
3	6.7	3.1	5.6	2.4
4	5.0	2.0	3.5	1.0