Wobei Cov(X,Y) die Kovarianz , Var(X) und Var(Y) die Varianz ist.
Das folgende T-SQL Skript berechnet die Korrelation zwischen zwei Spalten einer Tabelle. Zusätzlich wird der t-Wert bestimmt um Signifikanztests durch zu führen.
IF OBJECT_ID('tempdb..#aTable') IS NOT NULL
DROP TABLE #aTable
/* Creating an example table */
CREATE TABLE #aTable (
ROW int
,VALUE_1 int
,VALUE_2 int
)
INSERT INTO #aTable VALUES(1,12,12)
INSERT INTO #aTable VALUES(2,11,12)
INSERT INTO #aTable VALUES(3,11,11)
INSERT INTO #aTable VALUES(4,12,12)
INSERT INTO #aTable VALUES(5,13,13)
INSERT INTO #aTable VALUES(6,12,13)
INSERT INTO #aTable VALUES(7,13,13)
INSERT INTO #aTable VALUES(8,11,12)
INSERT INTO #aTable VALUES(9,6,7)
INSERT INTO #aTable VALUES(10,7,7)
/* Calculate descriptors of a distribution */
DECLARE @n int
SELECT @n = COUNT(*)FROM #aTable
DECLARE @VALUE_1_MIDDLE float
SELECT @VALUE_1_MIDDLE = (SELECT SUM(VALUE_1)/@n FROM #aTable)
DECLARE @VALUE_1_VARIANCE float
SELECT @VALUE_1_VARIANCE = (SELECT SQRT((SUM( POWER(VALUE_1 - @VALUE_1_MIDDLE, 2)))/@n) FROM #aTable)
DECLARE @VALUE_2_MIDDLE float
SELECT @VALUE_2_MIDDLE = (SELECT SUM(VALUE_2)/@n FROM #aTable)
DECLARE @VALUE_2_VARIANCE float
SELECT @VALUE_2_VARIANCE = (SELECT SQRT((SUM( POWER(VALUE_2 - @VALUE_2_MIDDLE, 2)))/@n) FROM #aTable)
/* calculating correlation between VALUE_1 and VALUE_2 */
DECLARE @r float
SELECT @r = (SELECT SUM((VALUE_1 - @VALUE_1_MIDDLE) * (VALUE_2 - @VALUE_2_MIDDLE)) / (@n * @VALUE_1_VARIANCE * @VALUE_2_VARIANCE)) FROM #aTable)
/* Calculating the student t-value (if @n >= 4) with df = (@n - 2)*/
DECLARE @t float
IF ((1 - POWER(@r,2)) > 0)
SELECT @t = (@r * SQRT(@n -2))/(SQRT(1 - POWER(@r,2)))
ELSE
SELECT @t = 0
SELECT @r as coorelation, @t as t_value
SELECT * FROM #aTable


Post a Comment