Skip to content

Produkt-Moment-Korrelation nach Pearson in Transact SQL

Wobei Cov(X,Y) die Kovarianz , Var(X) und Var(Y) die Varianz ist.

Das folgende T-SQL Skript berechnet die Korrelation zwischen zwei Spalten einer Tabelle. Zusätzlich wird der t-Wert bestimmt um Signifikanztests durch zu führen.

IF OBJECT_ID('tempdb..#aTable') IS NOT NULL
 DROP TABLE #aTable
/* Creating an example table */
CREATE TABLE #aTable (
 ROW int
 ,VALUE_1 int
 ,VALUE_2 int
)

INSERT INTO #aTable VALUES(1,12,12)
INSERT INTO #aTable VALUES(2,11,12)
INSERT INTO #aTable VALUES(3,11,11)
INSERT INTO #aTable VALUES(4,12,12)
INSERT INTO #aTable VALUES(5,13,13)
INSERT INTO #aTable VALUES(6,12,13)
INSERT INTO #aTable VALUES(7,13,13)
INSERT INTO #aTable VALUES(8,11,12)
INSERT INTO #aTable VALUES(9,6,7)
INSERT INTO #aTable VALUES(10,7,7)

/* Calculate descriptors of a distribution */
DECLARE @n int
SELECT @n = COUNT(*)FROM #aTable

DECLARE @VALUE_1_MIDDLE float
SELECT @VALUE_1_MIDDLE = (SELECT SUM(VALUE_1)/@n FROM #aTable)

DECLARE @VALUE_1_VARIANCE float
SELECT @VALUE_1_VARIANCE = (SELECT SQRT((SUM( POWER(VALUE_1 - @VALUE_1_MIDDLE, 2)))/@n) FROM #aTable)

DECLARE @VALUE_2_MIDDLE float
SELECT @VALUE_2_MIDDLE = (SELECT SUM(VALUE_2)/@n FROM #aTable)

DECLARE @VALUE_2_VARIANCE float
SELECT @VALUE_2_VARIANCE = (SELECT SQRT((SUM( POWER(VALUE_2 - @VALUE_2_MIDDLE, 2)))/@n) FROM #aTable)

/* calculating correlation between VALUE_1 and VALUE_2 */
DECLARE @r float
SELECT @r = (SELECT SUM((VALUE_1 - @VALUE_1_MIDDLE) * (VALUE_2 - @VALUE_2_MIDDLE)) / (@n * @VALUE_1_VARIANCE * @VALUE_2_VARIANCE)) FROM #aTable)

/* Calculating the student t-value (if @n >= 4) with df = (@n - 2)*/
DECLARE @t float
IF ((1 - POWER(@r,2)) > 0)
 SELECT @t = (@r * SQRT(@n  -2))/(SQRT(1 - POWER(@r,2)))
ELSE
 SELECT @t = 0

SELECT @r as coorelation, @t as t_value
SELECT * FROM #aTable

Post a Comment

Your email is never published nor shared. Required fields are marked *
*
*

Optimized by SEO Ultimate