# import the important packages
import pandas as pd  # library used for data manipulation and analysis
import numpy as np  # library used for working with arrays
import matplotlib.pyplot as plt  # library for visualization
import seaborn as sns  # library for visualization
%matplotlib inline

import scipy.stats as stats  # this library contains a large number of probability distributions as well as a growing library of statistical functions

# Connect to google
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

mydata = pd.read_csv('InternetMobileTime.csv')
mydata.head()

mydata.shape

(30, 1)

print("The sample size for this problem is", len(mydata))

The sample size for this problem is 30

sample_mean = mydata["Minutes"].mean()

# calculating z-stat

n=30
mu = 144
sigma = 110

test_stat =  (sample_mean - mu)/(sigma/np.sqrt(n))

test_stat

1.8157832663959144

from scipy.stats import norm

# p-value for one-tailed test
p_value1 = 1 - norm.cdf(test_stat)

# we can find the p_value for the the two-tailed test from the one-tailed test
p_value_ztest = p_value1*2

print('The p-value is: {0} '.format(p_value_ztest))

The p-value is: 0.06940362517785204

alpha_value = 0.05 # level of significance

print('Level of significance: %.2f' %alpha_value)

if p_value_ztest < alpha_value:
    print('We have evidence to reject the null hypothesis since the p-value is less than the level of significance'.format(p_value_ztest))
else:
    print('We have no evidence to reject the null hypothesis since the p-value is greater than the level of significance'.format(p_value_ztest))

Level of significance: 0.05
We have no evidence to reject the null hypothesis since the p-value is greater than the level of significance

t_statistic, p_value_ttest = stats.ttest_1samp(mydata, popmean = 144)
print('One sample t test \nt statistic: {0} p value: {1} '.format(t_statistic, p_value_ttest))

One sample t test 
t statistic: [1.41131966] p value: [0.16878961]

alpha_value = 0.05 # level of significance

print('Level of significance: %.2f' %alpha_value)

if p_value_ttest < alpha_value:
    print('We have evidence to reject the null hypothesis since the p-value is less than the level of significance'.format(p_value_ttest))
else:
    print('We have no evidence to reject the null hypothesis since the p-value is greater than the level of significance'.format(p_value_ttest))

Level of significance: 0.05
We have no evidence to reject the null hypothesis since the p-value is greater than the level of significance

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week Two - Statistics for Data Science/Mentored Session/Notebook - Mobile Internet Usage Analysis.ipynb"

[NbConvertApp] Converting notebook /content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week Two - Statistics for Data Science/Mentored Session/Notebook - Mobile Internet Usage Analysis.ipynb to html
[NbConvertApp] Writing 299534 bytes to /content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week Two - Statistics for Data Science/Mentored Session/Notebook - Mobile Internet Usage Analysis.html

Context:¶

Key Question:¶

Importing necessary libraries¶

Importing the Data¶

Step 1: Define null and alternate hypotheses¶

The null hypothesis states that the mean Internet usage time, $\mu$ is equal to 144.¶

The alternative hypothesis states that the mean Internet usage time, $\mu$ is not equal to 144.¶

Step 2: Decide the significance level¶

Step 3: Identify the test statistic¶

Step 4: Calculate the p-value using z-statistic¶

Step 5: Decide to reject or not to reject the null hypothesis based on the z-statistic¶

Step 6: Calculate the p-value using t-statistic¶

Step 7: Decide to reject or not to reject the null hypothesis based on t-statistic¶

	Minutes
0	72
1	144
2	48
3	72
4	36