# Connect to Google
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

!pip install scipy==1.6.1

import scipy
scipy.__version__

'1.8.0'

# import the important packages
import pandas as pd  # library used for data manipulation and analysis
import numpy as np  # library used for working with arrays
import matplotlib.pyplot as plt  # library for plots and visualizations
import seaborn as sns  # library for visualizations

%matplotlib inline

import scipy.stats as stats  # this library contains a large number of probability distributions as well as a growing library of statistical functions

# set the values of population mean and population standard deviation to 5 and 1.3 respectively
mu, sigma = 5, 1.3

# set the value of sample mean to 5.25
x_bar = 5.25

# calculate the test statistic
test_stat = (x_bar - mu) / (sigma/np.sqrt(45))
test_stat

1.2900392177883402

# import the required function
from scipy.stats import norm

# plotting the distribution of Z test statistic along with the test statistic
# we are plotting the distributions here to better visualize the calculations
x = np.linspace(-4, 4, 100)
plt.plot(x, norm.pdf(x, 0, 1))
plt.axvline(x = test_stat, c = 'r')
plt.show()

1 - norm.cdf(test_stat)

0.09851852092578695

# import the required function
from scipy.stats import norm

# find the critical value
critical_val = norm.ppf(1-.05)
critical_val

1.6448536269514722

# plotting the test statistic distribution and indicating the rejection and acceptance region
# we are plotting the distributions here to better visualize the calculations
x = np.linspace(-4, 4, 100)
plt.plot(x, norm.pdf(x, 0, 1))
plt.axvline(x = critical_val, c = 'r')
x1 = np.linspace(critical_val, 4, 50)
plt.fill_between(x1, norm.pdf(x1, 0, 1), color='r')
plt.annotate('Reject Null', (2, 0.20))
plt.annotate('  Do Not Reject\n        Null', (-1, 0.20))
plt.show()

# plotting the test statistic distribution along with computed test statistic
# we are plotting the distributions here to better visualize the calculations
x = np.linspace(-4, 4, 100)
plt.plot(x, norm.pdf(x, 0, 1))
plt.axvline(x = test_stat, c = 'r')
x1 = np.linspace(test_stat, 4, 50)
plt.fill_between(x1, norm.pdf(x1, 0, 1), color='r')
plt.show()

# calculate the p-value
1 - norm.cdf(test_stat)

0.09851852092578695

# set the values of population mean and standard deviation to 600 and 50 respectively.
mu, sigma = 600, 50

# set the value of sample mean to 580.
x_bar = 580

# calculate the test statistic.
test_stat = (x_bar - mu) / (sigma/np.sqrt(36))
test_stat

-2.4

# import the required function
from scipy.stats import norm

# find the critical values
critical_val1 = norm.ppf(1-(0.05/2))
critical_val2 = norm.ppf(0.05/2)

# plot the rejection and acceptance region
# we are plotting the distributions here to better visualize the calculations
x = np.linspace(-4, 4, 100)
plt.plot(x, norm.pdf(x, 0, 1))
plt.axvline(x = critical_val1, c = 'r')
x1 = np.linspace(critical_val1, 4, 50)
plt.fill_between(x1, norm.pdf(x1, 0, 1), color='r')
plt.axvline(x = critical_val2, c = 'r')
x1 = np.linspace(-4, critical_val2, 50)
plt.fill_between(x1, norm.pdf(x1, 0, 1), color='r')
plt.annotate('Reject Null', (2.2, 0.20))
plt.annotate('Reject Null', (-3.5, 0.20))
plt.annotate('  Do Not Reject\n        Null', (-1, 0.20))
plt.show()

# set the values of population mean and standard deviation to 600 and 50 respectively
mu, sigma = 600, 50

# set the value of sample mean to 580
x_bar = 580

# calculate the test statistic
test_stat = (x_bar - mu) / (sigma/np.sqrt(36))
test_stat

-2.4

# import the required function
from scipy.stats import norm

# find the critical value
critical_val = norm.ppf(0.05)

# plot the rejection and acceptance region
# we are plotting the distributions here to better visualize the calculations
x = np.linspace(-4, 4, 100)
plt.plot(x, norm.pdf(x, 0, 1))
plt.axvline(x = critical_val, c = 'r')
x1 = np.linspace(-4, critical_val, 50)
plt.fill_between(x1, norm.pdf(x1, 0, 1), color='r')
plt.annotate('Reject Null', (-3.5, 0.20))
plt.annotate('  Do Not Reject\n        Null', (-1, 0.20))
plt.show()

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week Two - Statistics for Data Science/Notebook_Hypothesis_Testing.ipynb"

Concepts Covered:¶

Command to install the latest version Scipy in the system¶

Import the required packages¶

Hypothesis Test for Population Mean¶

One Sample Z-test (when population standard deviation is known)¶

Let's write the null hypothesis and alternate hypothesis¶

Are the assumptions of the Z-test satisfied?¶

The next step is to find the Z test statistic¶

The Z test statistic follows a standard normal distribution.¶

Introduction of Rejection Acceptance Region/ p-value¶

Rejection Region Approach¶

Insight¶

p-value Approach¶

Insight¶

Key Takeaway¶

One-tailed and Two-tailed Tests¶

Let's see an example¶

Two-tailed Test Example¶

Are the assumptions of Z-test satisfied?¶

The next step is to find the test statistic¶

Let's use the rejection region approach for the two-tailed test¶

One-tailed Test Example¶

Are the assumptions of the Z-test satisfied?¶

The next step is to find the test statistic¶

Let's use the rejection region approach for the one-tailed test¶

Key Takeaways¶