Python,C,C++ and JAVA programs for CBSE, ISC, B.Tech and I.T Computer Science and MCA students

The Programming Project: Karl Pearson Coefficient of Correlation and Scatter diagram Regression Line

Saturday, January 21, 2023

Karl Pearson Coefficient of Correlation and Scatter diagram Regression Line

 A simple program to calculate the correlation between two variates (X and Y) and the Karl Pearson Coefficient of correlation. The scatter diagram along with the regression line of Y on X has been plotted. 




import matplotlib.pyplot as plt
import numpy as np
import math


class RegressionLine:

    def plotScatterDiagram(self):
        x = np.array(self._xValues)
        y = np.array(self._yValues)
        plt.scatter(x, y)
        # calculating regression of y on x
        self._bYX = (self._karlPCoefficient*self._sigmaY)/self._sigmaX
        self._bXY = (self._karlPCoefficient*self._sigmaX)/self._sigmaY
        x = np.linspace(self._minX,self._maxX,100)
        y = (self._bYX)*x + (self._sumY/self._N - (self._sumX/self._N)*self._bYX )
        plt.title('Regression line of y on x')
        plt.plot(x, y, '-r', label='regression line of y on x')
        plt.xlabel('x', color='#1C2833')
        plt.ylabel('y', color='#1C2833')
        plt.show()
    def parityString(self, string):
        while len(string) < 9:
            string += " "
        return string

    def accept(self):
        self._N = int(input("Enter the total number of values:"))
        print("Enter the data for x and y:")
        for i in range(self._N):
            print("Enter the value at position ", (i + 1))
            self._xValues.append(float(input("Enter a x value: ")))
            self._yValues.append(float(input("Enter a y value: ")))
        print("Enter 0 is you don't want a change of scale:")
        self._A = float(input("Enter the assumed mean for x-data set:"))
        self._B = float(input("Enter the assumed mean for y-data set:"))

    def calculateCovariance(self):
        for i in range(self._N):
            if self._maxX < self._xValues[i]:
                self._maxX = self._xValues[i]
            if self._minX > self._xValues[i]:
                self._minX = self._xValues[i]
            self._uValues.append(self._xValues[i] - self._A)
            self._vValues.append(self._yValues[i] - self._B)
            self._uvValues.append(self._uValues[i] * self._vValues[i])
            self._sumXY += self._uvValues[i]
            self._sumX += self._uValues[i]
            self._sumY += self._vValues[i]
        self._coVarianceXY = (1 / self._N) * (
            self._sumXY - (1 / self._N) * self._sumX * self._sumY)
        # print covariance table
        if self._A == 0 and self._B == 0:
            print(self.parityString("X"), self.parityString("Y"),
                  self.parityString("XY"))
            for i in range(self._N):
                print(self.parityString(str(self._xValues[i])),
                      self.parityString(str(self._yValues[i])),
                      self.parityString(str(self._uvValues[i])))
            print("-------------------------------------")
            print(self.parityString(str(self._sumX)),
                  self.parityString(str(self._sumY)),
                  self.parityString(str(self._sumXY)))
            print("-------------------------------------")
            print("Cov(X,Y)=", self._coVarianceXY)
        else:
            print(self.parityString("X"), self.parityString("u"),
                  self.parityString("Y"), self.parityString("v"),
                  self.parityString("uv"))
            for i in range(self._N):
                print(self.parityString(str(self._xValues[i])),
                      self.parityString(str(self._uValues[i])),
                      self.parityString(str(self._yValues[i])),
                      self.parityString(str(self._vValues[i])),
                      self.parityString(str(self._uvValues[i])))
            print("-----------------------------------------------")
            print(self.parityString(" "), self.parityString(str(self._sumX)),
                  self.parityString(" "), self.parityString(str(self._sumY)),
                  self.parityString(str(self._sumXY)))
            print("-----------------------------------------------")
            print("Cov(X,Y)=", self._coVarianceXY)

    def calculateKarlPearson(self):
        self._sigmaX = self.calculateSD(self._uValues)
        self._sigmaY = self.calculateSD(self._vValues)
        self._karlPCoefficient = self._coVarianceXY / (self._sigmaX *
                                                       self._sigmaY)
        print("r =", round(self._karlPCoefficient, 3))

    def calculateSD(self, _valuesList):
        standard_deviation = 0.0
        mean = 0
        for i in range(self._N):
            mean += _valuesList[i]
        mean /= self._N
        for i in range(self._N):
            standard_deviation += (_valuesList[i] - mean)**2
        standard_deviation /= self._N
        standard_deviation = math.sqrt(standard_deviation)
        return standard_deviation

    def __init__(self) -> None:
        self._xValues = []
        self._uValues = []
        self._yValues = []
        self._vValues = []
        self._uvValues = []
        self._coVarianceXY = 0.0
        self._karlPCoefficient = 0.0
        self._sigmaX = 0.0  # standard deviation for X
        self._sigmaY = 0.0  # standard deviation for Y
        self._N = 0
        self._A = 0  # assumed mean for X
        self._B = 0  # assumed mean for Y
        self._maxX = float('-inf')
        self._minX = float('inf')
        self._sumX = 0
        self._sumY = 0
        self._sumXY = 0
        self._bYX = 0
        self._bXY = 0

obj = RegressionLine()
obj.accept()
obj.calculateCovariance()
obj.calculateKarlPearson()
obj.plotScatterDiagram()

No comments:

Post a Comment