{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#try the stacking method in this link\n", "#https://www.kaggle.com/code/arthurtok/introduction-to-ensembling-stacking-in-python#Generating-our-Base-First-Level-Models" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from wcds import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Sex | \n", "Length | \n", "Diameter | \n", "Height | \n", "Whole weight | \n", "Whole weight.1 | \n", "Whole weight.2 | \n", "Shell weight | \n", "Rings | \n", "
---|---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "F | \n", "0.550 | \n", "0.430 | \n", "0.150 | \n", "0.7715 | \n", "0.3285 | \n", "0.1465 | \n", "0.2400 | \n", "11 | \n", "
1 | \n", "F | \n", "0.630 | \n", "0.490 | \n", "0.145 | \n", "1.1300 | \n", "0.4580 | \n", "0.2765 | \n", "0.3200 | \n", "11 | \n", "
2 | \n", "I | \n", "0.160 | \n", "0.110 | \n", "0.025 | \n", "0.0210 | \n", "0.0055 | \n", "0.0030 | \n", "0.0050 | \n", "6 | \n", "
3 | \n", "M | \n", "0.595 | \n", "0.475 | \n", "0.150 | \n", "0.9145 | \n", "0.3755 | \n", "0.2055 | \n", "0.2500 | \n", "10 | \n", "
4 | \n", "I | \n", "0.555 | \n", "0.425 | \n", "0.130 | \n", "0.7820 | \n", "0.3695 | \n", "0.1600 | \n", "0.1975 | \n", "9 | \n", "
\n", " | Sex | \n", "Length | \n", "Diameter | \n", "Height | \n", "Whole weight | \n", "Whole weight.1 | \n", "Whole weight.2 | \n", "Shell weight | \n", "
---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
90615 | \n", "M | \n", "0.645 | \n", "0.475 | \n", "0.155 | \n", "1.2380 | \n", "0.6185 | \n", "0.3125 | \n", "0.3005 | \n", "
90616 | \n", "M | \n", "0.580 | \n", "0.460 | \n", "0.160 | \n", "0.9830 | \n", "0.4785 | \n", "0.2195 | \n", "0.2750 | \n", "
90617 | \n", "M | \n", "0.560 | \n", "0.420 | \n", "0.140 | \n", "0.8395 | \n", "0.3525 | \n", "0.1845 | \n", "0.2405 | \n", "
90618 | \n", "M | \n", "0.570 | \n", "0.490 | \n", "0.145 | \n", "0.8740 | \n", "0.3525 | \n", "0.1865 | \n", "0.2350 | \n", "
90619 | \n", "I | \n", "0.415 | \n", "0.325 | \n", "0.110 | \n", "0.3580 | \n", "0.1575 | \n", "0.0670 | \n", "0.1050 | \n", "
\n", " | Sex | \n", "Length | \n", "Diameter | \n", "Height | \n", "Whole weight | \n", "Shucked weight | \n", "Viscera weight | \n", "Shell weight | \n", "Rings | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "M | \n", "0.455 | \n", "0.365 | \n", "0.095 | \n", "0.5140 | \n", "0.2245 | \n", "0.1010 | \n", "0.150 | \n", "15 | \n", "
1 | \n", "M | \n", "0.350 | \n", "0.265 | \n", "0.090 | \n", "0.2255 | \n", "0.0995 | \n", "0.0485 | \n", "0.070 | \n", "7 | \n", "
2 | \n", "F | \n", "0.530 | \n", "0.420 | \n", "0.135 | \n", "0.6770 | \n", "0.2565 | \n", "0.1415 | \n", "0.210 | \n", "9 | \n", "
3 | \n", "M | \n", "0.440 | \n", "0.365 | \n", "0.125 | \n", "0.5160 | \n", "0.2155 | \n", "0.1140 | \n", "0.155 | \n", "10 | \n", "
4 | \n", "I | \n", "0.330 | \n", "0.255 | \n", "0.080 | \n", "0.2050 | \n", "0.0895 | \n", "0.0395 | \n", "0.055 | \n", "7 | \n", "
Pipeline(steps=[('add feature',\n", " FunctionTransformer(func=<function add_feature at 0x0000023020B6C7C0>,\n", " kw_args={'dimension_ratio_flag': 0,\n", " 'max_ratio': 1,\n", " 'meat_shell_flag': 0,\n", " 'proportional_weight_flag': 0,\n", " 'shell_surface_flag': 0,\n", " 'shell_volume_flag': 0})),\n", " ('preprocess',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('excludecolumnselector',\n", " ExcludeC...\n", " gpu_id=None, grow_policy=None,\n", " importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=0.00530171011345918, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=18,\n", " max_leaves=None, min_child_weight=13, missing=nan,\n", " monotone_constraints=None, n_estimators=831,\n", " n_jobs=None, num_parallel_tree=None,\n", " predictor=None, random_state=None, ...))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('add feature',\n", " FunctionTransformer(func=<function add_feature at 0x0000023020B6C7C0>,\n", " kw_args={'dimension_ratio_flag': 0,\n", " 'max_ratio': 1,\n", " 'meat_shell_flag': 0,\n", " 'proportional_weight_flag': 0,\n", " 'shell_surface_flag': 0,\n", " 'shell_volume_flag': 0})),\n", " ('preprocess',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('excludecolumnselector',\n", " ExcludeC...\n", " gpu_id=None, grow_policy=None,\n", " importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=0.00530171011345918, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=18,\n", " max_leaves=None, min_child_weight=13, missing=nan,\n", " monotone_constraints=None, n_estimators=831,\n", " n_jobs=None, num_parallel_tree=None,\n", " predictor=None, random_state=None, ...))])
FunctionTransformer(func=<function add_feature at 0x0000023020B6C7C0>,\n", " kw_args={'dimension_ratio_flag': 0, 'max_ratio': 1,\n", " 'meat_shell_flag': 0,\n", " 'proportional_weight_flag': 0,\n", " 'shell_surface_flag': 0, 'shell_volume_flag': 0})
FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('excludecolumnselector',\n", " ExcludeColumnSelector(subset=['Sex'])),\n", " ('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('pipeline_winsorize',\n", " <wcds.tools._pipeline.Pipeline_winsorize object at 0x000002302AC72490>),\n", " ('standardscaler',\n", " StandardScaler()),\n", " ('pipeline_drop_correlated_features',\n", " Pipeline_Drop_correlated_features(threshold=0.99))])),\n", " ('pipeline-2',\n", " Pipeline(steps=[('includecolumnselector',\n", " IncludeColumnSelector(subset=['Sex'])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='Missing',\n", " strategy='constant')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore',\n", " sparse_output=False))]))])
ExcludeColumnSelector(subset=['Sex'])
SimpleImputer(strategy='median')
<wcds.tools._pipeline.Pipeline_winsorize object at 0x000002302AC72490>
StandardScaler()
Pipeline_Drop_correlated_features(threshold=0.99)
IncludeColumnSelector(subset=['Sex'])
SimpleImputer(fill_value='Missing', strategy='constant')
OneHotEncoder(handle_unknown='ignore', sparse_output=False)
passthrough
passthrough
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=0.5855306249815994, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=0.5105516847527882, gpu_id=None, grow_policy=None,\n", " importance_type=None, interaction_constraints=None,\n", " learning_rate=0.00530171011345918, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=18, max_leaves=None,\n", " min_child_weight=13, missing=nan, monotone_constraints=None,\n", " n_estimators=831, n_jobs=None, num_parallel_tree=None,\n", " predictor=None, random_state=None, ...)
\n", " | Sex | \n", "Length | \n", "Diameter | \n", "Height | \n", "Whole weight | \n", "Whole weight.1 | \n", "Whole weight.2 | \n", "Shell weight | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "F | \n", "0.550 | \n", "0.430 | \n", "0.150 | \n", "0.7715 | \n", "0.3285 | \n", "0.1465 | \n", "0.240 | \n", "
2 | \n", "I | \n", "0.160 | \n", "0.110 | \n", "0.025 | \n", "0.0210 | \n", "0.0055 | \n", "0.0030 | \n", "0.005 | \n", "
3 | \n", "M | \n", "0.595 | \n", "0.475 | \n", "0.150 | \n", "0.9145 | \n", "0.3755 | \n", "0.2055 | \n", "0.250 | \n", "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=0.777126613189779, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=0.3484025318375001, gpu_id=None, grow_policy=None,\n", " importance_type=None, interaction_constraints=None,\n", " learning_rate=0.007020575494601785, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=4, max_leaves=None,\n", " min_child_weight=7, missing=nan, monotone_constraints=None,\n", " n_estimators=650, n_jobs=None, num_parallel_tree=None,\n", " predictor=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=0.777126613189779, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=0.3484025318375001, gpu_id=None, grow_policy=None,\n", " importance_type=None, interaction_constraints=None,\n", " learning_rate=0.007020575494601785, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=4, max_leaves=None,\n", " min_child_weight=7, missing=nan, monotone_constraints=None,\n", " n_estimators=650, n_jobs=None, num_parallel_tree=None,\n", " predictor=None, random_state=None, ...)