Algorithm Implementation/Statistics/Fleiss' kappa

From Wikibooks, open books for an open world
Jump to: navigation, search

Contents

[edit] Java

import java.util.Arrays;
 
/**
 * Computes the Fleiss' Kappa value as described in (Fleiss, 1971) 
 */
public class FleissKappa
{
    public static final boolean DEBUG = true ;
 
    /**
     * Exemple on this Wikipedia article data set 
     */
    public static void main(String[] args)
    {
        short[][] mat = new short[][]
        {
            {0,0,0,0,14},
            {0,2,6,4,2},
            {0,0,3,5,6},
            {0,3,9,2,0},
            {2,2,8,1,1},
            {7,7,0,0,0},
            {3,2,6,3,0},
            {2,5,3,2,2},
            {6,5,2,1,0},
            {0,2,2,3,7}
        } ;
 
        float kappa = computeKappa(mat) ;
    }
 
    /**
     * Computes the Kappa value
     * @param n Number of rating per subjects (number of human raters)
     * @param mat Matrix[subjects][categories]
     * @return The Kappa value
     */
    public static float computeKappa(short[][] mat)
    {
        final int n = checkEachLineCount(mat) ;  // PRE : every line count must be equal to n
        final int N = mat.length ;          
        final int k = mat[0].length ;       
 
        if(DEBUG) System.out.println(n+" raters.") ;
        if(DEBUG) System.out.println(N+" subjects.") ;
        if(DEBUG) System.out.println(k+" categories.") ;
 
        // Computing p[]
        float[] p = new float[k] ;
        for(int j=0 ; j<k ; j++)
        {
            p[j] = 0 ;
            for(int i=0 ; i<N ; i++)
                p[j] += mat[i][j] ;
            p[j] /= N*n ;
        }
        if(DEBUG) System.out.println("p = "+Arrays.toString(p)) ;
 
        // Computing P[]    
        float[] P = new float[N] ;
        for(int i=0 ; i<N ; i++)
        {
            P[i] = 0 ;
            for(int j=0 ; j<k ; j++)
                P[i] += mat[i][j] * mat[i][j] ;
            P[i] = (P[i] - n) / (n * (n - 1)) ;
        }
        if(DEBUG) System.out.println("P = "+Arrays.toString(P)) ;
 
        // Computing Pbar
        float Pbar = 0 ;
        for(float Pi : P)
            Pbar += Pi ;
        Pbar /= N ;
        if(DEBUG) System.out.println("Pbar = "+Pbar) ;
 
        // Computing PbarE
        float PbarE = 0 ;
        for(float pj : p)
            PbarE += pj * pj ;
        if(DEBUG) System.out.println("PbarE = "+PbarE) ;
 
        final float kappa = (Pbar - PbarE)/(1 - PbarE) ;
        if(DEBUG) System.out.println("kappa = "+kappa) ;
 
        return kappa ;
    }
 
    /**
     * Assert that each line has a constant number of ratings
     * @param mat The matrix checked
     * @return The number of ratings
     * @throws IllegalArgumentException If lines contain different number of ratings
     */
    private static int checkEachLineCount(short[][] mat)
    {
        int n = 0 ;
        boolean firstLine = true ;
 
        for(short[] line : mat)
        {
            int count = 0 ;
            for(short cell : line)
                count += cell ;
            if(firstLine)
            {
                n = count ;
                firstLine = false ;
            }
            if(n != count)
                throw new IllegalArgumentException("Line count != "+n+" (n value).") ;
        }
        return n ;
    }
}

[edit] Python

""" Computes the Fleiss' Kappa value as described in (Fleiss, 1971) """
 
DEBUG = True
 
def computeKappa(mat):
    """ Computes the Kappa value
        @param n Number of rating per subjects (number of human raters)
        @param mat Matrix[subjects][categories]
        @return The Kappa value """
    n = checkEachLineCount(mat)   # PRE : every line count must be equal to n
    N = len(mat)
    k = len(mat[0])
 
    if DEBUG:
        print n, "raters."
        print N, "subjects."
        print k, "categories."
 
    # Computing p[]
    p = [0.0] * k
    for j in xrange(k):
        p[j] = 0.0
        for i in xrange(N):
            p[j] += mat[i][j]
        p[j] /= N*n
    if DEBUG: print "p =", p
 
    # Computing P[]    
    P = [0.0] * N
    for i in xrange(N):
        P[i] = 0.0
        for j in xrange(k):
            P[i] += mat[i][j] * mat[i][j]
        P[i] = (P[i] - n) / (n * (n - 1))
    if DEBUG: print "P =", P
 
    # Computing Pbar
    Pbar = sum(P) / N
    if DEBUG: print "Pbar =", Pbar
 
    # Computing PbarE
    PbarE = 0.0
    for pj in p:
        PbarE += pj * pj
    if DEBUG: print "PbarE =", PbarE
 
    kappa = (Pbar - PbarE) / (1 - PbarE)
    if DEBUG: print "kappa =", kappa
 
    return kappa
 
def checkEachLineCount(mat):
    """ Assert that each line has a constant number of ratings
        @param mat The matrix checked
        @return The number of ratings
        @throws AssertionError If lines contain different number of ratings """
    n = sum(mat[0])
 
    assert all(sum(line) == n for line in mat[1:]), "Line count != %d (n value)." % n
    return n
 
if __name__ == "__main__":
    """ Example on this Wikipedia article data set """
 
    mat = \
    [
        [0,0,0,0,14],
        [0,2,6,4,2],
        [0,0,3,5,6],
        [0,3,9,2,0],
        [2,2,8,1,1],
        [7,7,0,0,0],
        [3,2,6,3,0],
        [2,5,3,2,2],
        [6,5,2,1,0],
        [0,2,2,3,7]
    ]
 
    kappa = computeKappa(mat)

[edit] Ruby

#
# Computes the Fleiss' Kappa value as described in (Fleiss, 1971) 
#
def sum(arr)
  i=0
  arr.each{|x|i+=x}
  i
end
 
# Assert that each line has a constant number of ratings
def checkEachLineCount(matrix)
  n = sum(matrix[0])
  #  Raises an exception if lines contain different number of ratings 
  matrix.each{|line|raise "Line count != #{n} (n value)." if sum(line) !=n}
  n # The number of ratings
end
 
# Computes the Kappa value
# param matrix [subjects][categories]
def computeKappa(matrix)
    debug = true
 
    #  n Number of rating per subjects (number of human raters)
    n = checkEachLineCount(matrix)   # PRE : every line count must be equal to n
    i_N = matrix.size
    k = matrix[0].size
 
    if debug
      puts "#{n} raters."
      puts "#{i_N} subjects."
      puts "#{k} categories."
    end
 
    # Computing p[]
    p = [0.0] * k
    (0...k).each do |j|
      p[j] = 0.0
      (0...i_N).each {|i| p[j] += matrix[i][j] }    
      p[j] /= i_N*n        
    end
 
    puts "p = #{p.join(',')}" if debug
 
    # Computing f_P[]    
    f_P = [0.0] * i_N
 
    (0...i_N).each do |i|
      f_P[i] = 0.0
      (0...k).each {|j| f_P[i] += matrix[i][j] * matrix[i][j] }    
      f_P[i] = (f_P[i] - n) / (n * (n - 1))        
    end    
 
    puts "f_P = #{f_P.join(',')}" if debug
 
    # Computing Pbar
    f_Pbar = sum(f_P) / i_N
    puts "f_Pbar = #{f_Pbar}" if debug
 
    # Computing f_PbarE
    f_PbarE = 0.0
    for pj in p
        f_PbarE += pj * pj
    end
 
    puts "f_PbarE = #{f_PbarE}" if debug 
 
    kappa = (f_Pbar - f_PbarE) / (1 - f_PbarE)
    puts "kappa = #{kappa}" if debug 
 
    kappa   
end
 
# Example on this Wikipedia article data set
 
matrix = [
  [0,0,0,0,14],
  [0,2,6,4,2],
  [0,0,3,5,6],
  [0,3,9,2,0],
  [2,2,8,1,1],
  [7,7,0,0,0],
  [3,2,6,3,0],
  [2,5,3,2,2],
  [6,5,2,1,0],
  [0,2,2,3,7]
]
 
kappa = computeKappa(matrix)

[edit] Perl

#!/usr/local/bin/perl
 
use constant false => 0;
use constant true  => 1;
 
@mat = (['0','0','0','0','14'],
        ['0','2','6','4','2'],
        ['0','0','3','5','6'],
        ['0','3','9','2','0'],
        ['2','2','8','1','1'],
        ['7','7','0','0','0'],
        ['3','2','6','3','0'],
        ['2','5','3','2','2'],
        ['6','5','2','1','0'],
        ['0','2','2','3','7']);
$kappa = &computeKappa(@mat);
 
#####
# Computes the Kappa value
# @param n Number of rating per subjects (number of human raters)
# @param mat Matrix[subjects][categories]
# @return The Kappa value
#####
 
sub computeKappa($){
        $n = &checkEachLineCount(@mat);  # PRE : every line count must be equal to n
        $N = $#mat+1;          
        $k = $#{ $mat[0] }+1;       
 
        print $n." raters\n";
        print $N." subjects\n";
        print $k." categories\n";
 
        @p = ();
        for my $j (0 .. $k-1){
                $p[$j] = 0;
                for my $i (0 .. $N-1){
                        $p[$j] += $mat[$i][$j];
                }
                $p[$j] /= $N*$n;
        }
        print "p = [";
        for my $i (0 .. $#p-1){
                ($i == $#p) ? print $p[$i] : print $p[$i].", "; 
        }
        print "]\n";
 
        for my $i (0 .. $N-1){
                $P[$i] = 0;
                for my $j (0 .. $k-1){
                        $P[$i] += $mat[$i][$j]*$mat[$i][$j];
                }
                $P[$i] = ($P[$i] - $n) / ($n * ($n - 1));
        }
        print "P = [";
        for my $i (0 .. $#P){
                ($i == $#P) ? print $P[$i] : print $P[$i].", "; 
        }
        print "]\n";
 
        $Pbar = 0;
        for $i (0 .. $#P){
                $Pbar += $P[$i];
        }
        $Pbar /= $N;
        print "Pbar = ".$Pbar."\n";
 
        $PbarE = 0;
        for my $i (0 .. $#p){
                $PbarE += $p[$i] * $p[$i];
        }
        print "PbarE = ".$PbarE."\n";
 
        $kappa = ($Pbar - $PbarE)/(1 - $PbarE);
        print "kappa = ".$kappa."\n";
        return $kappa;
}
 
#####
# Assert that each line has a constant number of ratings
# @param mat The matrix checked
# @return The number of ratings
#####
 
sub checkEachLineCount($){
        $n = 0;
        $firstLine = true;
        for my $i (0 .. $#mat){
                $count = 0;
                for my $j (0 .. $#{ $mat[$i] }){
                        $count += $mat[$i][$j];
                }
                if($firstLine){
                        $n = $count;
                        $firstLine = false;
                }
                if($n != $count){
                        print "Line count != ".$n." (n value).\n";
                }
                return $n;
        }
}


[edit] PHP

function fleissKappa($table){
 
    /*   
    *   $table is an n x m array containing the classification counts
    *   
    *   adapted from the example in en.wikipedia.org/wiki/Fleiss'_kappa 
    * 
    */
 
    $subjects = count($table);
    $classes = count($table[0]);
    $raters = array_sum($table[0]);
 
    for($q = 1; $q < count($table); $q++){
 
        if(count($table[$q])!=$classes){
 
            print("no equal number of classes.");
            exit(0);
        }
 
        if(array_sum($table[$q])!=$raters){
 
            print("no equal number of raters.");
            exit(0);
        }
 
    }
 
    $pj = array();
    $pi = array();
 
    for($j = 0; $j < $subjects; $j++){
 
        $pi[$j] =0;
    }
 
    for($i = 0; $i < $classes; $i++){
 
        $tpj = 0;
 
        for($j = 0; $j < $subjects; $j++){
 
            $tpj += $table[$j][$i];
            $pi[$j] +=  $table[$j][$i]*$table[$j][$i];
 
        }
 
        $pj[$i] = $tpj/($raters*$subjects);
 
    }
 
    for($j = 0; $j < $subjects; $j++){
        $pi[$j] = $pi[$j]-$raters;
        $pi[$j] = $pi[$j]*(1/($raters*($raters-1)));
    }
 
    $pcarret = array_sum($pi)/$subjects;
    $pecarret = 0;
 
    for($i = 0; $i < count($pj);$i++){
 
        $pecarret += $pj[$i]*$pj[$i];
 
    }
 
    $kappa = ($pcarret-$pecarret)/(1-$pecarret);
 
    return $kappa;
 
}
Personal tools
Namespaces
Variants
Actions
Navigation
Community
Toolbox
Sister projects
Print/export