The Programming Project: Pyhton
Showing posts with label Pyhton. Show all posts
Showing posts with label Pyhton. Show all posts

Saturday, January 21, 2023

Karl Pearson Coefficient of Correlation and Scatter diagram Regression Line

 A simple program to calculate the correlation between two variates (X and Y) and the Karl Pearson Coefficient of correlation. The scatter diagram along with the regression line of Y on X has been plotted. 




import matplotlib.pyplot as plt
import numpy as np
import math


class RegressionLine:

    def plotScatterDiagram(self):
        x = np.array(self._xValues)
        y = np.array(self._yValues)
        plt.scatter(x, y)
        # calculating regression of y on x
        self._bYX = (self._karlPCoefficient*self._sigmaY)/self._sigmaX
        self._bXY = (self._karlPCoefficient*self._sigmaX)/self._sigmaY
        x = np.linspace(self._minX,self._maxX,100)
        y = (self._bYX)*x + (self._sumY/self._N - (self._sumX/self._N)*self._bYX )
        plt.title('Regression line of y on x')
        plt.plot(x, y, '-r', label='regression line of y on x')
        plt.xlabel('x', color='#1C2833')
        plt.ylabel('y', color='#1C2833')
        plt.show()
    def parityString(self, string):
        while len(string) < 9:
            string += " "
        return string

    def accept(self):
        self._N = int(input("Enter the total number of values:"))
        print("Enter the data for x and y:")
        for i in range(self._N):
            print("Enter the value at position ", (i + 1))
            self._xValues.append(float(input("Enter a x value: ")))
            self._yValues.append(float(input("Enter a y value: ")))
        print("Enter 0 is you don't want a change of scale:")
        self._A = float(input("Enter the assumed mean for x-data set:"))
        self._B = float(input("Enter the assumed mean for y-data set:"))

    def calculateCovariance(self):
        for i in range(self._N):
            if self._maxX < self._xValues[i]:
                self._maxX = self._xValues[i]
            if self._minX > self._xValues[i]:
                self._minX = self._xValues[i]
            self._uValues.append(self._xValues[i] - self._A)
            self._vValues.append(self._yValues[i] - self._B)
            self._uvValues.append(self._uValues[i] * self._vValues[i])
            self._sumXY += self._uvValues[i]
            self._sumX += self._uValues[i]
            self._sumY += self._vValues[i]
        self._coVarianceXY = (1 / self._N) * (
            self._sumXY - (1 / self._N) * self._sumX * self._sumY)
        # print covariance table
        if self._A == 0 and self._B == 0:
            print(self.parityString("X"), self.parityString("Y"),
                  self.parityString("XY"))
            for i in range(self._N):
                print(self.parityString(str(self._xValues[i])),
                      self.parityString(str(self._yValues[i])),
                      self.parityString(str(self._uvValues[i])))
            print("-------------------------------------")
            print(self.parityString(str(self._sumX)),
                  self.parityString(str(self._sumY)),
                  self.parityString(str(self._sumXY)))
            print("-------------------------------------")
            print("Cov(X,Y)=", self._coVarianceXY)
        else:
            print(self.parityString("X"), self.parityString("u"),
                  self.parityString("Y"), self.parityString("v"),
                  self.parityString("uv"))
            for i in range(self._N):
                print(self.parityString(str(self._xValues[i])),
                      self.parityString(str(self._uValues[i])),
                      self.parityString(str(self._yValues[i])),
                      self.parityString(str(self._vValues[i])),
                      self.parityString(str(self._uvValues[i])))
            print("-----------------------------------------------")
            print(self.parityString(" "), self.parityString(str(self._sumX)),
                  self.parityString(" "), self.parityString(str(self._sumY)),
                  self.parityString(str(self._sumXY)))
            print("-----------------------------------------------")
            print("Cov(X,Y)=", self._coVarianceXY)

    def calculateKarlPearson(self):
        self._sigmaX = self.calculateSD(self._uValues)
        self._sigmaY = self.calculateSD(self._vValues)
        self._karlPCoefficient = self._coVarianceXY / (self._sigmaX *
                                                       self._sigmaY)
        print("r =", round(self._karlPCoefficient, 3))

    def calculateSD(self, _valuesList):
        standard_deviation = 0.0
        mean = 0
        for i in range(self._N):
            mean += _valuesList[i]
        mean /= self._N
        for i in range(self._N):
            standard_deviation += (_valuesList[i] - mean)**2
        standard_deviation /= self._N
        standard_deviation = math.sqrt(standard_deviation)
        return standard_deviation

    def __init__(self) -> None:
        self._xValues = []
        self._uValues = []
        self._yValues = []
        self._vValues = []
        self._uvValues = []
        self._coVarianceXY = 0.0
        self._karlPCoefficient = 0.0
        self._sigmaX = 0.0  # standard deviation for X
        self._sigmaY = 0.0  # standard deviation for Y
        self._N = 0
        self._A = 0  # assumed mean for X
        self._B = 0  # assumed mean for Y
        self._maxX = float('-inf')
        self._minX = float('inf')
        self._sumX = 0
        self._sumY = 0
        self._sumXY = 0
        self._bYX = 0
        self._bXY = 0

obj = RegressionLine()
obj.accept()
obj.calculateCovariance()
obj.calculateKarlPearson()
obj.plotScatterDiagram()