import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

# Removes the limit for the number of displayed columns
pd.set_option("display.max_columns", None)

# Sets the limit for the number of displayed rows
pd.set_option("display.max_rows", 200)

# To build models for prediction
from Scikit-learn.model_selection import train_test_split, cross_val_score, KFold
from Scikit-learn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from Scikit-learn.tree import DecisionTreeRegressor
from Scikit-learn.ensemble import RandomForestRegressor,BaggingRegressor

# To encode categorical variables
from Scikit-learn.preprocessing import LabelEncoder

# For tuning the model
from Scikit-learn.model_selection import GridSearchCV

# To check model performance
from Scikit-learn.metrics import make_scorer,mean_squared_error, r2_score, mean_absolute_error

from google.colab import drive
drive.mount('/content/Mydrive')

Drive already mounted at /content/Mydrive; to attempt to forcibly remount, call drive.mount("/content/Mydrive", force_remount=True).

# Read the healthcare dataset file
data = pd.read_csv("/content/Mydrive/MyDrive/healthcare_data_1.csv")

# Copying data to another variable to avoid any changes to original data
same_data = data.copy()

# View the first 5 rows of the dataset
data.head()

# View the last 5 rows of the dataset
data.tail()

# Understand the shape of the data
data.shape

(500000, 15)

# Checking the info of the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500000 entries, 0 to 499999
Data columns (total 15 columns):
 #   Column                             Non-Null Count   Dtype  
---  ------                             --------------   -----  
 0   Available Extra Rooms in Hospital  500000 non-null  int64  
 1   Department                         500000 non-null  object 
 2   Ward_Facility_Code                 500000 non-null  object 
 3   doctor_name                        500000 non-null  object 
 4   staff_available                    500000 non-null  int64  
 5   patientid                          500000 non-null  int64  
 6   Age                                500000 non-null  object 
 7   gender                             500000 non-null  object 
 8   Type of Admission                  500000 non-null  object 
 9   Severity of Illness                500000 non-null  object 
 10  health_conditions                  348112 non-null  object 
 11  Visitors with Patient              500000 non-null  int64  
 12  Insurance                          500000 non-null  object 
 13  Admission_Deposit                  500000 non-null  float64
 14  Stay (in days)                     500000 non-null  int64  
dtypes: float64(1), int64(5), object(9)
memory usage: 57.2+ MB

# To view patientid and the number of times they have been admitted to the hospital
data['patientid'].value_counts()

patientid
126719    21
125695    21
44572     21
126623    21
125625    19
          ..
37634      1
91436      1
118936     1
52366      1
105506     1
Name: count, Length: 126399, dtype: int64

# Dropping patientid from the data as it is an identifier and will not add value to the analysis
data=data.drop(columns=["patientid"])

# Checking for duplicate values in the data
data.duplicated().sum()

0

# Checking the descriptive statistics of the columns
data.describe().T

# List of all important categorical variables
cat_col = ["Department", "Type of Admission", 'Severity of Illness', 'gender', 'Insurance', 'health_conditions', 'doctor_name', "Ward_Facility_Code", "Age"]

# Printing the number of occurrences of each unique value in each categorical column
for column in cat_col:
    print(data[column].value_counts(1))
    print("-" * 50)

Department
gynecology            0.686956
radiotherapy          0.168630
anesthesia            0.088358
TB & Chest disease    0.045780
surgery               0.010276
Name: proportion, dtype: float64
--------------------------------------------------
Type of Admission
Trauma       0.621072
Emergency    0.271568
Urgent       0.107360
Name: proportion, dtype: float64
--------------------------------------------------
Severity of Illness
Moderate    0.560394
Minor       0.263074
Extreme     0.176532
Name: proportion, dtype: float64
--------------------------------------------------
gender
Female    0.74162
Male      0.20696
Other     0.05142
Name: proportion, dtype: float64
--------------------------------------------------
Insurance
Yes    0.78592
No     0.21408
Name: proportion, dtype: float64
--------------------------------------------------
health_conditions
Other                  0.271209
High Blood Pressure    0.228093
Diabetes               0.211553
Asthama                0.188198
Heart disease          0.100947
Name: proportion, dtype: float64
--------------------------------------------------
doctor_name
Dr Sarah     0.199192
Dr Olivia    0.196704
Dr Sophia    0.149506
Dr Nathan    0.141554
Dr Sam       0.111422
Dr John      0.102526
Dr Mark      0.088820
Dr Isaac     0.006718
Dr Simon     0.003558
Name: proportion, dtype: float64
--------------------------------------------------
Ward_Facility_Code
F    0.241076
D    0.238110
B    0.207770
E    0.190748
A    0.093102
C    0.029194
Name: proportion, dtype: float64
--------------------------------------------------
Age
21-30     0.319586
31-40     0.266746
41-50     0.160812
11-20     0.093072
61-70     0.053112
51-60     0.043436
71-80     0.037406
81-90     0.016362
0-10      0.006736
91-100    0.002732
Name: proportion, dtype: float64
--------------------------------------------------

# Function to plot a boxplot and a histogram along the same scale

def histogram_boxplot(data, feature, figsize=(12, 7), kde=False, bins=None):
    """
    Boxplot and histogram combined

    data: dataframe
    feature: dataframe column
    figsize: size of figure (default (12,7))
    kde: whether to the show density curve (default False)
    bins: number of bins for histogram (default None)
    """
    f2, (ax_box2, ax_hist2) = plt.subplots(
        nrows = 2,      # Number of rows of the subplot grid = 2
        sharex = True,  # x-axis will be shared among all subplots
        gridspec_kw = {"height_ratios": (0.25, 0.75)},
        figsize = figsize,
    )                   # Creating the 2 subplots
    sns.boxplot(data = data, x = feature, ax = ax_box2, showmeans = True, color = "violet"
    )                   # Boxplot will be created and a star will indicate the mean value of the column
    sns.histplot(
        data = data, x = feature, kde = kde, ax = ax_hist2, bins = bins, palette = "winter"
    ) if bins else sns.histplot(
        data = data, x = feature, kde = kde, ax = ax_hist2
    )                   # For histogram
    ax_hist2.axvline(
        data[feature].mean(), color = "green", linestyle = "--"
    )                   # Add mean to the histogram
    ax_hist2.axvline(
        data[feature].median(), color = "black", linestyle = "-"
    )                   # Add median to the histogram

histogram_boxplot(data, "Stay (in days)", kde = True, bins = 30)

<ipython-input-54-52571307273e>:21: UserWarning: Ignoring `palette` because no `hue` variable has been assigned.
  sns.histplot(

histogram_boxplot(data, "Admission_Deposit", kde = True, bins = 30)

<ipython-input-54-52571307273e>:21: UserWarning: Ignoring `palette` because no `hue` variable has been assigned.
  sns.histplot(

histogram_boxplot(data, "Visitors with Patient", kde = True, bins = 30)

<ipython-input-54-52571307273e>:21: UserWarning: Ignoring `palette` because no `hue` variable has been assigned.
  sns.histplot(

numeric_data = data.select_dtypes(include=[np.number])

# Finding the correlation between various columns of the dataset
plt.figure(figsize=(15, 7))
sns.heatmap(numeric_data.corr(), annot=True, vmin=-1, vmax=1, fmt=".2f", cmap="Spectral")
plt.show()

# Function to plot stacked bar plots

def stacked_barplot(data, predictor, target):
    """
    Print the category counts and plot a stacked bar chart

    data: dataframe
    predictor: independent variable
    target: target variable
    """
    count = data[predictor].nunique()
    sorter = data[target].value_counts().index[-1]
    tab1 = pd.crosstab(data[predictor], data[target], margins = True).sort_values(
        by = sorter, ascending = False
    )
    print(tab1)
    print("-" * 120)
    tab = pd.crosstab(data[predictor], data[target], normalize = "index").sort_values(
        by = sorter, ascending = False
    )
    tab.plot(kind = "bar", stacked = True, figsize = (count + 1, 5))
    plt.legend(
        loc = "lower left",
        frameon = False,
    )
    plt.legend(loc = "upper left", bbox_to_anchor = (1, 1))
    plt.show()

sns.barplot(y = 'Ward_Facility_Code', x = 'Stay (in days)', data = data)
plt.show()

stacked_barplot(data, "Ward_Facility_Code", "Department")

Department          TB & Chest disease  anesthesia  gynecology  radiotherapy  \
Ward_Facility_Code                                                             
A                                 4709       15611           0         21093   
All                              22890       44179      343478         84315   
B                                    0           0      103885             0   
C                                 1319        4199           0          9079   
D                                    0           0      119055             0   
E                                16862       24369           0         54143   
F                                    0           0      120538             0   

Department          surgery     All  
Ward_Facility_Code                   
A                      5138   46551  
All                    5138  500000  
B                         0  103885  
C                         0   14597  
D                         0  119055  
E                         0   95374  
F                         0  120538  
------------------------------------------------------------------------------------------------------------------------

stacked_barplot(data, "Ward_Facility_Code", "Severity of Illness")

Severity of Illness  Extreme   Minor  Moderate     All
Ward_Facility_Code                                    
All                    88266  131537    280197  500000
D                      29549   27220     62286  119055
B                      24222   23579     56084  103885
A                      13662    7877     25012   46551
E                      11488   22254     61632   95374
F                       5842   47594     67102  120538
C                       3503    3013      8081   14597
------------------------------------------------------------------------------------------------------------------------

sns.barplot(y = 'Age', x = 'Stay (in days)', data = data)
plt.show()

data.groupby(['doctor_name'])['Department'].agg(Department_Name='unique',Patients_Treated='count')

# Creating dummy variables for the categorical columns
# drop_first=True is used to avoid redundant variables
data = pd.get_dummies(
    data,
    columns = data.select_dtypes(include = ["object", "category"]).columns.tolist(),
    drop_first = True,
)

# Check the data after handling categorical data
data

# Separating independent variables and the target variable
x = data.drop('Stay (in days)',axis=1)

y = data['Stay (in days)']

# Splitting the dataset into train and test datasets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, shuffle = True, random_state = 1)

# Checking the shape of the train and test data
print("Shape of Training set : ", x_train.shape)
print("Shape of test set : ", x_test.shape)

Shape of Training set :  (400000, 41)
Shape of test set :  (100000, 41)

# Function to compute adjusted R-squared
def adj_r2_score(predictors, targets, predictions):
    r2 = r2_score(targets, predictions)
    n = predictors.shape[0]
    k = predictors.shape[1]
    return 1 - ((1 - r2) * (n - 1) / (n - k - 1))


# Function to compute MAPE
def mape_score(targets, predictions):
    return np.mean(np.abs(targets - predictions) / targets) * 100


# Function to compute different metrics to check performance of a regression model
def model_performance_regression(model, predictors, target):
    """
    Function to compute different metrics to check regression model performance

    model: regressor
    predictors: independent variables
    target: dependent variable
    """

    pred = model.predict(predictors)                  # Predict using the independent variables
    r2 = r2_score(target, pred)                       # To compute R-squared
    adjr2 = adj_r2_score(predictors, target, pred)    # To compute adjusted R-squared
    rmse = np.sqrt(mean_squared_error(target, pred))  # To compute RMSE
    mae = mean_absolute_error(target, pred)           # To compute MAE
    mape = mape_score(target, pred)                   # To compute MAPE

    # Creating a dataframe of metrics
    df_perf = pd.DataFrame(
        {
            "RMSE": rmse,
            "MAE": mae,
            "R-squared": r2,
            "Adj. R-squared": adjr2,
            "MAPE": mape,
        },
        index=[0],
    )

    return df_perf

# Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state = 1)

# Fitting the model
dt_regressor.fit(x_train, y_train)

# Model Performance on the test data, i.e., prediction
dt_regressor_perf_test = model_performance_regression(dt_regressor, x_test, y_test)

dt_regressor_perf_test

from Scikit-learn import tree
features = list(x.columns)

# Building the model with max_depth=3
dt_regressor_visualize = DecisionTreeRegressor(random_state = 1, max_depth=3)

# Fitting the model
dt_regressor_visualize.fit(x_train, y_train)


plt.figure(figsize = (20, 20))
tree.plot_tree(dt_regressor_visualize, feature_names = features, filled = True, fontsize = 12,
               node_ids = True, class_names = True)
plt.show()

print(tree.export_text(dt_regressor_visualize, feature_names=x_train.columns.tolist(), show_weights=True))

|--- Department_gynecology <= 0.50
|   |--- Age_31-40 <= 0.50
|   |   |--- Age_41-50 <= 0.50
|   |   |   |--- value: [26.84]
|   |   |--- Age_41-50 >  0.50
|   |   |   |--- value: [10.02]
|   |--- Age_31-40 >  0.50
|   |   |--- Department_anesthesia <= 0.50
|   |   |   |--- value: [6.94]
|   |   |--- Department_anesthesia >  0.50
|   |   |   |--- value: [17.97]
|--- Department_gynecology >  0.50
|   |--- Available Extra Rooms in Hospital <= 12.50
|   |   |--- Admission_Deposit <= 4605.06
|   |   |   |--- value: [8.69]
|   |   |--- Admission_Deposit >  4605.06
|   |   |   |--- value: [8.51]
|   |--- Available Extra Rooms in Hospital >  12.50
|   |   |--- Type of Admission_Trauma <= 0.50
|   |   |   |--- value: [10.76]
|   |   |--- Type of Admission_Trauma >  0.50
|   |   |   |--- value: [10.30]

# Bagging Regressor
bagging_estimator = BaggingRegressor(random_state = 1)

# Fitting the model
bagging_estimator.fit(x_train, y_train)

# Model Performance on the test data
bagging_estimator_perf_test = model_performance_regression(bagging_estimator, x_test, y_test)

bagging_estimator_perf_test

# Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators = 100, random_state = 1)

# Fitting the model
rf_regressor.fit(x_train, y_train)

# Model Performance on the test data
rf_regressor_perf_test = model_performance_regression(rf_regressor, x_test, y_test)

rf_regressor_perf_test

# Importing AdaBoost Regressor
from Scikit-learn.ensemble import AdaBoostRegressor

# AdaBoost Regressor
ada_regressor = AdaBoostRegressor(random_state=1)

# Fitting the model
ada_regressor.fit(x_train, y_train)

# Model Performance on the test data
ada_regressor_perf_test = model_performance_regression(ada_regressor, x_test, y_test)

ada_regressor_perf_test

# Importing Gradient Boosting Regressor
from Scikit-learn.ensemble import GradientBoostingRegressor

# Gradient Boosting Regressor
grad_regressor = GradientBoostingRegressor(random_state=1)

# Fitting the model
grad_regressor.fit(x_train, y_train)

# Model Performance on the test data
grad_regressor_perf_test = model_performance_regression(grad_regressor, x_test, y_test)

grad_regressor_perf_test

# Installing the xgboost library using the 'pip' command
!pip install xgboost

Requirement already satisfied: xgboost in /usr/local/lib/python3.10/dist-packages (2.0.3)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from xgboost) (1.25.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from xgboost) (1.11.4)

# Importing XGBoost Regressor
from xgboost import XGBRegressor

# XGBoost Regressor
xgb = XGBRegressor(random_state = 1)

# Fitting the model
xgb.fit(x_train,y_train)

# Model Performance on the test data
xgb_perf_test = model_performance_regression(xgb, x_test, y_test)

xgb_perf_test

models_test_comp_df = pd.concat(
    [
        dt_regressor_perf_test.T,
        bagging_estimator_perf_test.T,
        rf_regressor_perf_test.T,
        ada_regressor_perf_test.T,
        grad_regressor_perf_test.T,
        xgb_perf_test.T
    ],
    axis = 1,
)

models_test_comp_df.columns = [
    "Decision tree regressor",
    "Bagging Regressor",
    "Random Forest regressor",
    "Ada Boost Regressor",
    "Gradient Boosting Regressor",
    "XG Boost Regressor"]

print("Test performance comparison:")

models_test_comp_df.T

Test performance comparison:

rf_tuned = RandomForestRegressor(random_state = 1)

# Grid of parameters to choose from
rf_parameters = {"n_estimators": [100, 110, 120],

    "max_depth": [5, 7, None],

    "max_features": [0.8, 1]
             }

# Run the grid search
rf_grid_obj = GridSearchCV(rf_tuned, rf_parameters, scoring = 'neg_mean_squared_error', cv = 5)

rf_grid_obj = rf_grid_obj.fit(x_train, y_train)

# Set the rf_tuned_regressor to the best combination of parameters
rf_tuned_regressor = rf_grid_obj.best_estimator_

rf_tuned_regressor.fit(x_train, y_train)

# Model Performance on the test data
rf_tuned_regressor_perf_test = model_performance_regression(rf_tuned_regressor, x_test, y_test)

rf_tuned_regressor_perf_test

models_test_comp_df = pd.concat(
    [
        dt_regressor_perf_test.T,
        bagging_estimator_perf_test.T,
        rf_regressor_perf_test.T,
        ada_regressor_perf_test.T,
        grad_regressor_perf_test.T,
        xgb_perf_test.T,
        rf_tuned_regressor_perf_test.T,
    ],
    axis = 1,
)

models_test_comp_df.columns = [
    "Decision tree regressor",
    "Bagging Regressor",
    "Random Forest regressor",
    "Ada Boost Regressor",
    "Gradient Boosting Regressor",
    "XG Boost Regressor",
    "Random Forest Tuned Regressor"]

print("Test performance comparison:")

models_test_comp_df.T

Test performance comparison:

# Plotting the feature importance
features = list(x.columns)

importances = rf_tuned_regressor.feature_importances_

indices = np.argsort(importances)

plt.figure(figsize = (10, 10))

plt.title('Feature Importances')

plt.barh(range(len(indices)), importances[indices], color = 'violet', align = 'center')

plt.yticks(range(len(indices)), [features[i] for i in indices])

plt.xlabel('Relative Importance')

plt.show()

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/My Drive/Colab Notebooks/Copy of FDS_Project_LearnerNotebook_FullCode.ipynb"

	count	mean	std	min	25%	50%	75%	max
Available Extra Rooms in Hospital	500000.0	3.638800	2.698124	0.000000	2.000000	3.000000	4.000000	24.00000
staff_available	500000.0	5.020470	3.158103	0.000000	2.000000	5.000000	8.000000	10.00000
Visitors with Patient	500000.0	3.549414	2.241054	0.000000	2.000000	3.000000	4.000000	32.00000
Admission_Deposit	500000.0	4722.315734	1047.324220	1654.005148	4071.714532	4627.003792	5091.612717	10104.72639
Stay (in days)	500000.0	12.381062	7.913174	3.000000	8.000000	9.000000	11.000000	51.00000

	Department_Name	Patients_Treated
doctor_name
Dr Isaac	[surgery]	3359
Dr John	[TB & Chest disease, anesthesia, radiotherapy]	51263
Dr Mark	[anesthesia, TB & Chest disease]	44410
Dr Nathan	[gynecology]	70777
Dr Olivia	[gynecology]	98352
Dr Sam	[radiotherapy]	55711
Dr Sarah	[gynecology]	99596
Dr Simon	[surgery]	1779
Dr Sophia	[gynecology]	74753

	RMSE	MAE	R-squared	Adj. R-squared	MAPE
Decision tree regressor	1.821321	1.131270	0.947324	0.947302	9.353216
Bagging Regressor	1.364505	0.902326	0.970434	0.970422	7.627444
Random Forest regressor	1.302336	0.863677	0.973067	0.973056	7.306138
Ada Boost Regressor	2.375388	1.586890	0.910399	0.910363	13.623722
Gradient Boosting Regressor	1.792721	1.212749	0.948965	0.948944	10.247284
XG Boost Regressor	1.513463	1.034136	0.963626	0.963612	8.868662

	RMSE	MAE	R-squared	Adj. R-squared	MAPE
Decision tree regressor	1.821321	1.131270	0.947324	0.947302	9.353216
Bagging Regressor	1.364505	0.902326	0.970434	0.970422	7.627444
Random Forest regressor	1.302336	0.863677	0.973067	0.973056	7.306138
Ada Boost Regressor	2.375388	1.586890	0.910399	0.910363	13.623722
Gradient Boosting Regressor	1.792721	1.212749	0.948965	0.948944	10.247284
XG Boost Regressor	1.513463	1.034136	0.963626	0.963612	8.868662
Random Forest Tuned Regressor	1.294712	0.859110	0.973381	0.973370	7.268059

Hospital Length of Stay (LOS) Prediction¶

Context:¶

Objective:¶

Data Dictionary:¶

Approach to solve the problem:¶

Importing Libraries¶

Data Overview¶

Exploratory Data Analysis (EDA)¶

Univariate Analysis¶

Length of stay¶

Admission Deposit¶

Visitors with Patients¶

Bivariate Analysis¶

Data Preparation for Model Building¶

Model Building¶

Decision Trees¶

Bagging Regressor¶

Random Forest Regressor¶

AdaBoost¶

Gradient Boosting Regressor¶

XGBoost Regressor¶

Models' Performance Comparison¶

Choosing the Models for Tuning Hyperparameters¶

Tuning the Model¶

Tuned Random Forest Regressor¶

Choosing the Final Model¶

Visualizing the Feature Importance¶

Business Insights and Recommendations¶

Next Steps¶

	Available Extra Rooms in Hospital	Department	Ward_Facility_Code	doctor_name	staff_available	patientid	Age	gender	Type of Admission	Severity of Illness	health_conditions	Visitors with Patient	Insurance	Admission_Deposit	Stay (in days)
0	4	gynecology	D	Dr Sophia	0	33070	41-50	Female	Trauma	Extreme	Diabetes	4	Yes	2966.408696	8
1	4	gynecology	B	Dr Sophia	2	34808	31-40	Female	Trauma	Minor	Heart disease	2	No	3554.835677	9
2	2	gynecology	B	Dr Sophia	8	44577	21-30	Female	Trauma	Extreme	Diabetes	2	Yes	5624.733654	7
3	4	gynecology	D	Dr Olivia	7	3695	31-40	Female	Urgent	Moderate	NaN	4	No	4814.149231	8
4	2	anesthesia	E	Dr Mark	10	108956	71-80	Male	Trauma	Moderate	Diabetes	2	No	5169.269637	34

	Available Extra Rooms in Hospital	Department	Ward_Facility_Code	doctor_name	staff_available	patientid	Age	gender	Type of Admission	Severity of Illness	health_conditions	Visitors with Patient	Insurance	Admission_Deposit	Stay (in days)
499995	4	gynecology	F	Dr Sarah	2	43001	11-20	Female	Trauma	Minor	High Blood Pressure	3	No	4105.795901	10
499996	13	gynecology	F	Dr Olivia	8	85601	31-40	Female	Emergency	Moderate	Other	2	No	4631.550257	11
499997	2	gynecology	B	Dr Sarah	3	22447	11-20	Female	Emergency	Moderate	High Blood Pressure	2	No	5456.930075	8
499998	2	radiotherapy	A	Dr John	1	29957	61-70	Female	Trauma	Extreme	Diabetes	2	No	4694.127772	23
499999	3	gynecology	F	Dr Sophia	3	45008	41-50	Female	Trauma	Moderate	Heart disease	4	Yes	4713.868519	10

	Available Extra Rooms in Hospital	staff_available	Visitors with Patient	Admission_Deposit	Stay (in days)	Department_anesthesia	Department_gynecology	Department_radiotherapy	Department_surgery	Ward_Facility_Code_B	Ward_Facility_Code_C	Ward_Facility_Code_D	Ward_Facility_Code_E	Ward_Facility_Code_F	doctor_name_Dr John	doctor_name_Dr Mark	doctor_name_Dr Nathan	doctor_name_Dr Olivia	doctor_name_Dr Sam	doctor_name_Dr Sarah	doctor_name_Dr Simon	doctor_name_Dr Sophia	Age_11-20	Age_21-30	Age_31-40	Age_41-50	Age_51-60	Age_61-70	Age_71-80	Age_81-90	Age_91-100	gender_Male	gender_Other	Type of Admission_Trauma	Type of Admission_Urgent	Severity of Illness_Minor	Severity of Illness_Moderate	health_conditions_Diabetes	health_conditions_Heart disease	health_conditions_High Blood Pressure	health_conditions_Other	Insurance_Yes
0	4	0	4	2966.408696	8	False	True	False	False	False	False	True	False	False	False	False	False	False	False	False	False	True	False	False	False	True	False	False	False	False	False	False	False	True	False	False	False	True	False	False	False	True
1	4	2	2	3554.835677	9	False	True	False	False	True	False	False	False	False	False	False	False	False	False	False	False	True	False	False	True	False	False	False	False	False	False	False	False	True	False	True	False	False	True	False	False	False
2	2	8	2	5624.733654	7	False	True	False	False	True	False	False	False	False	False	False	False	False	False	False	False	True	False	True	False	False	False	False	False	False	False	False	False	True	False	False	False	True	False	False	False	True
3	4	7	4	4814.149231	8	False	True	False	False	False	False	True	False	False	False	False	False	True	False	False	False	False	False	False	True	False	False	False	False	False	False	False	False	False	True	False	True	False	False	False	False	False
4	2	10	2	5169.269637	34	True	False	False	False	False	False	False	True	False	False	True	False	False	False	False	False	False	False	False	False	False	False	False	True	False	False	True	False	True	False	False	True	True	False	False	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
499995	4	2	3	4105.795901	10	False	True	False	False	False	False	False	False	True	False	False	False	False	False	True	False	False	True	False	False	False	False	False	False	False	False	False	False	True	False	True	False	False	False	True	False	False
499996	13	8	2	4631.550257	11	False	True	False	False	False	False	False	False	True	False	False	False	True	False	False	False	False	False	False	True	False	False	False	False	False	False	False	False	False	False	False	True	False	False	False	True	False
499997	2	3	2	5456.930075	8	False	True	False	False	True	False	False	False	False	False	False	False	False	False	True	False	False	True	False	False	False	False	False	False	False	False	False	False	False	False	False	True	False	False	True	False	False
499998	2	1	2	4694.127772	23	False	False	True	False	False	False	False	False	False	True	False	False	False	False	False	False	False	False	False	False	False	False	True	False	False	False	False	False	True	False	False	False	True	False	False	False	False
499999	3	3	4	4713.868519	10	False	True	False	False	False	False	False	False	True	False	False	False	False	False	False	False	True	False	False	False	True	False	False	False	False	False	False	False	True	False	False	True	False	True	False	False	True