A simple program to calculate the correlation between two variates (X and Y) and the Karl Pearson Coefficient of correlation. The scatter diagram along with the regression line of Y on X has been plotted.
import matplotlib.pyplot as plt
import numpy as np
import math
class RegressionLine:
def plotScatterDiagram(self):
x = np.array(self._xValues)
y = np.array(self._yValues)
plt.scatter(x, y)
# calculating regression of y on x
self._bYX = (self._karlPCoefficient*self._sigmaY)/self._sigmaX
self._bXY = (self._karlPCoefficient*self._sigmaX)/self._sigmaY
x = np.linspace(self._minX,self._maxX,100)
y = (self._bYX)*x + (self._sumY/self._N - (self._sumX/self._N)*self._bYX )
plt.title('Regression line of y on x')
plt.plot(x, y, '-r', label='regression line of y on x')
plt.xlabel('x', color='#1C2833')
plt.ylabel('y', color='#1C2833')
plt.show()
def parityString(self, string):
while len(string) < 9:
string += " "
return string
def accept(self):
self._N = int(input("Enter the total number of values:"))
print("Enter the data for x and y:")
for i in range(self._N):
print("Enter the value at position ", (i + 1))
self._xValues.append(float(input("Enter a x value: ")))
self._yValues.append(float(input("Enter a y value: ")))
print("Enter 0 is you don't want a change of scale:")
self._A = float(input("Enter the assumed mean for x-data set:"))
self._B = float(input("Enter the assumed mean for y-data set:"))
def calculateCovariance(self):
for i in range(self._N):
if self._maxX < self._xValues[i]:
self._maxX = self._xValues[i]
if self._minX > self._xValues[i]:
self._minX = self._xValues[i]
self._uValues.append(self._xValues[i] - self._A)
self._vValues.append(self._yValues[i] - self._B)
self._uvValues.append(self._uValues[i] * self._vValues[i])
self._sumXY += self._uvValues[i]
self._sumX += self._uValues[i]
self._sumY += self._vValues[i]
self._coVarianceXY = (1 / self._N) * (
self._sumXY - (1 / self._N) * self._sumX * self._sumY)
# print covariance table
if self._A == 0 and self._B == 0:
print(self.parityString("X"), self.parityString("Y"),
self.parityString("XY"))
for i in range(self._N):
print(self.parityString(str(self._xValues[i])),
self.parityString(str(self._yValues[i])),
self.parityString(str(self._uvValues[i])))
print("-------------------------------------")
print(self.parityString(str(self._sumX)),
self.parityString(str(self._sumY)),
self.parityString(str(self._sumXY)))
print("-------------------------------------")
print("Cov(X,Y)=", self._coVarianceXY)
else:
print(self.parityString("X"), self.parityString("u"),
self.parityString("Y"), self.parityString("v"),
self.parityString("uv"))
for i in range(self._N):
print(self.parityString(str(self._xValues[i])),
self.parityString(str(self._uValues[i])),
self.parityString(str(self._yValues[i])),
self.parityString(str(self._vValues[i])),
self.parityString(str(self._uvValues[i])))
print("-----------------------------------------------")
print(self.parityString(" "), self.parityString(str(self._sumX)),
self.parityString(" "), self.parityString(str(self._sumY)),
self.parityString(str(self._sumXY)))
print("-----------------------------------------------")
print("Cov(X,Y)=", self._coVarianceXY)
def calculateKarlPearson(self):
self._sigmaX = self.calculateSD(self._uValues)
self._sigmaY = self.calculateSD(self._vValues)
self._karlPCoefficient = self._coVarianceXY / (self._sigmaX *
self._sigmaY)
print("r =", round(self._karlPCoefficient, 3))
def calculateSD(self, _valuesList):
standard_deviation = 0.0
mean = 0
for i in range(self._N):
mean += _valuesList[i]
mean /= self._N
for i in range(self._N):
standard_deviation += (_valuesList[i] - mean)**2
standard_deviation /= self._N
standard_deviation = math.sqrt(standard_deviation)
return standard_deviation
def __init__(self) -> None:
self._xValues = []
self._uValues = []
self._yValues = []
self._vValues = []
self._uvValues = []
self._coVarianceXY = 0.0
self._karlPCoefficient = 0.0
self._sigmaX = 0.0 # standard deviation for X
self._sigmaY = 0.0 # standard deviation for Y
self._N = 0
self._A = 0 # assumed mean for X
self._B = 0 # assumed mean for Y
self._maxX = float('-inf')
self._minX = float('inf')
self._sumX = 0
self._sumY = 0
self._sumXY = 0
self._bYX = 0
self._bXY = 0
obj = RegressionLine()
obj.accept()
obj.calculateCovariance()
obj.calculateKarlPearson()
obj.plotScatterDiagram()
No comments:
Post a Comment