Jump to content

Algorithm Implementation/Statistics/Fleiss' kappa

From Wikibooks, open books for an open world
import java.util.Arrays;

/**
 * Computes the Fleiss' Kappa value as described in (Fleiss, 1971) 
 */
public class FleissKappa
{
    public static final boolean DEBUG = true ;
    
    /**
     * Example on this Wikipedia article data set 
     */
    public static void main(String[] args)
    {
        short[][] mat = new short[][]
        {
            {0,0,0,0,14},
            {0,2,6,4,2},
            {0,0,3,5,6},
            {0,3,9,2,0},
            {2,2,8,1,1},
            {7,7,0,0,0},
            {3,2,6,3,0},
            {2,5,3,2,2},
            {6,5,2,1,0},
            {0,2,2,3,7}
        } ;
        
        float kappa = computeKappa(mat) ;
    }
    
    /**
     * Computes the Kappa value
     * @param n Number of rating per subjects (number of human raters)
     * @param mat Matrix[subjects][categories]
     * @return The Kappa value
     */
    public static float computeKappa(short[][] mat)
    {
        final int n = checkEachLineCount(mat) ;  // PRE : every line count must be equal to n
        final int N = mat.length ;          
        final int k = mat[0].length ;       
        
        if(DEBUG) System.out.println(n+" raters.") ;
        if(DEBUG) System.out.println(N+" subjects.") ;
        if(DEBUG) System.out.println(k+" categories.") ;
        
        // Computing p[]
        float[] p = new float[k] ;
        for(int j=0 ; j<k ; j++)
        {
            p[j] = 0 ;
            for(int i=0 ; i<N ; i++)
                p[j] += mat[i][j] ;
            p[j] /= N*n ;
        }
        if(DEBUG) System.out.println("p = "+Arrays.toString(p)) ;
        
        // Computing P[]    
        float[] P = new float[N] ;
        for(int i=0 ; i<N ; i++)
        {
            P[i] = 0 ;
            for(int j=0 ; j<k ; j++)
                P[i] += mat[i][j] * mat[i][j] ;
            P[i] = (P[i] - n) / (n * (n - 1)) ;
        }
        if(DEBUG) System.out.println("P = "+Arrays.toString(P)) ;
        
        // Computing Pbar
        float Pbar = 0 ;
        for(float Pi : P)
            Pbar += Pi ;
        Pbar /= N ;
        if(DEBUG) System.out.println("Pbar = "+Pbar) ;
        
        // Computing PbarE
        float PbarE = 0 ;
        for(float pj : p)
            PbarE += pj * pj ;
        if(DEBUG) System.out.println("PbarE = "+PbarE) ;
        
        final float kappa = (Pbar - PbarE)/(1 - PbarE) ;
        if(DEBUG) System.out.println("kappa = "+kappa) ;
        
        return kappa ;
    }
    
    /**
     * Assert that each line has a constant number of ratings
     * @param mat The matrix checked
     * @return The number of ratings
     * @throws IllegalArgumentException If lines contain different number of ratings
     */
    private static int checkEachLineCount(short[][] mat)
    {
        int n = 0 ;
        boolean firstLine = true ;
        
        for(short[] line : mat)
        {
            int count = 0 ;
            for(short cell : line)
                count += cell ;
            if(firstLine)
            {
                n = count ;
                firstLine = false ;
            }
            if(n != count)
                throw new IllegalArgumentException("Line count != "+n+" (n value).") ;
        }
        return n ;
    }
}

Python

[edit | edit source]
""" Computes the Fleiss' Kappa value as described in (Fleiss, 1971) """

DEBUG = True

def computeKappa(mat):
    """ Computes the Kappa value
        @param n Number of rating per subjects (number of human raters)
        @param mat Matrix[subjects][categories]
        @return The Kappa value """
    n = checkEachLineCount(mat)   # PRE : every line count must be equal to n
    N = len(mat)
    k = len(mat[0])
    
    if DEBUG:
        print n, "raters."
        print N, "subjects."
        print k, "categories."
    
    # Computing p[]
    p = [0.0] * k
    for j in xrange(k):
        p[j] = 0.0
        for i in xrange(N):
            p[j] += mat[i][j]
        p[j] /= N*n
    if DEBUG: print "p =", p
    
    # Computing P[]    
    P = [0.0] * N
    for i in xrange(N):
        P[i] = 0.0
        for j in xrange(k):
            P[i] += mat[i][j] * mat[i][j]
        P[i] = (P[i] - n) / (n * (n - 1))
    if DEBUG: print "P =", P
    
    # Computing Pbar
    Pbar = sum(P) / N
    if DEBUG: print "Pbar =", Pbar
    
    # Computing PbarE
    PbarE = 0.0
    for pj in p:
        PbarE += pj * pj
    if DEBUG: print "PbarE =", PbarE
    
    kappa = (Pbar - PbarE) / (1 - PbarE)
    if DEBUG: print "kappa =", kappa
    
    return kappa

def checkEachLineCount(mat):
    """ Assert that each line has a constant number of ratings
        @param mat The matrix checked
        @return The number of ratings
        @throws AssertionError If lines contain different number of ratings """
    n = sum(mat[0])
    
    assert all(sum(line) == n for line in mat[1:]), "Line count != %d (n value)." % n
    return n

if __name__ == "__main__":
    """ Example on this Wikipedia article data set """

    mat = \
    [
        [0,0,0,0,14],
        [0,2,6,4,2],
        [0,0,3,5,6],
        [0,3,9,2,0],
        [2,2,8,1,1],
        [7,7,0,0,0],
        [3,2,6,3,0],
        [2,5,3,2,2],
        [6,5,2,1,0],
        [0,2,2,3,7]
    ]
    
    kappa = computeKappa(mat)
#
# Computes the Fleiss' Kappa value as described in (Fleiss, 1971) 
#
def sum(arr)
  arr.inject(:+)
end

# Assert that each line has a constant number of ratings
def checkEachLineCount(matrix)
  n = sum(matrix[0])
  #  Raises an exception if lines contain different number of ratings 
  matrix.each{|line|raise "Line count != #{n} (n value)." if sum(line) !=n}
  n # The number of ratings
end

# Computes the Kappa value
# param matrix [subjects][categories]
def computeKappa(matrix)
    debug = true

    #  n Number of rating per subjects (number of human raters)
    n = checkEachLineCount(matrix)   # PRE : every line count must be equal to n
    i_N = matrix.size
    k = matrix[0].size
 
    if debug
      puts "#{n} raters."
      puts "#{i_N} subjects."
      puts "#{k} categories."
    end
      
    # Computing p[]
    p = [0.0] * k
    (0...k).each do |j|
      p[j] = 0.0
      (0...i_N).each {|i| p[j] += matrix[i][j] }    
      p[j] /= i_N*n        
    end

    puts "p = #{p.join(',')}" if debug
   
    # Computing f_P[]    
    f_P = [0.0] * i_N

    (0...i_N).each do |i|
      f_P[i] = 0.0
      (0...k).each {|j| f_P[i] += matrix[i][j] * matrix[i][j] }    
      f_P[i] = (f_P[i] - n) / (n * (n - 1))        
    end    
     
    puts "f_P = #{f_P.join(',')}" if debug
     
    # Computing Pbar
    f_Pbar = sum(f_P) / i_N
    puts "f_Pbar = #{f_Pbar}" if debug
 
    # Computing f_PbarE
    f_PbarE = p.inject(0.0) { |acc,el| acc + el**2 }

    puts "f_PbarE = #{f_PbarE}" if debug 
  
    kappa = (f_Pbar - f_PbarE) / (1 - f_PbarE)
    puts "kappa = #{kappa}" if debug 
   
    kappa   
end

# Example on this Wikipedia article data set
 
matrix = [
  [0,0,0,0,14],
  [0,2,6,4,2],
  [0,0,3,5,6],
  [0,3,9,2,0],
  [2,2,8,1,1],
  [7,7,0,0,0],
  [3,2,6,3,0],
  [2,5,3,2,2],
  [6,5,2,1,0],
  [0,2,2,3,7]
]
 
kappa = computeKappa(matrix)
#!/usr/local/bin/perl

use constant false => 0;
use constant true  => 1;

@mat = (['0','0','0','0','14'],
	['0','2','6','4','2'],
	['0','0','3','5','6'],
	['0','3','9','2','0'],
	['2','2','8','1','1'],
	['7','7','0','0','0'],
	['3','2','6','3','0'],
	['2','5','3','2','2'],
	['6','5','2','1','0'],
	['0','2','2','3','7']);
$kappa = &computeKappa(@mat);

#####
# Computes the Kappa value
# @param n Number of rating per subjects (number of human raters)
# @param mat Matrix[subjects][categories]
# @return The Kappa value
#####

sub computeKappa($){
	$n = &checkEachLineCount(@mat);  # PRE : every line count must be equal to n
	$N = $#mat+1;          
	$k = $#{ $mat[0] }+1;       
 
	print $n." raters\n";
	print $N." subjects\n";
	print $k." categories\n";

	@p = ();
	for my $j (0 .. $k-1){
		$p[$j] = 0;
		for my $i (0 .. $N-1){
			$p[$j] += $mat[$i][$j];
		}
		$p[$j] /= $N*$n;
	}
	print "p = [";
	for my $i (0 .. $#p-1){
		($i == $#p) ? print $p[$i] : print $p[$i].", "; 
	}
	print "]\n";
   
	for my $i (0 .. $N-1){
		$P[$i] = 0;
		for my $j (0 .. $k-1){
			$P[$i] += $mat[$i][$j]*$mat[$i][$j];
		}
		$P[$i] = ($P[$i] - $n) / ($n * ($n - 1));
	}
	print "P = [";
	for my $i (0 .. $#P){
		($i == $#P) ? print $P[$i] : print $P[$i].", "; 
	}
	print "]\n";

	$Pbar = 0;
	for $i (0 .. $#P){
		$Pbar += $P[$i];
	}
	$Pbar /= $N;
        print "Pbar = ".$Pbar."\n";

	$PbarE = 0;
	for my $i (0 .. $#p){
		$PbarE += $p[$i] * $p[$i];
	}
	print "PbarE = ".$PbarE."\n";

	$kappa = ($Pbar - $PbarE)/(1 - $PbarE);
	print "kappa = ".$kappa."\n";
	return $kappa;
}
 
#####
# Assert that each line has a constant number of ratings
# @param mat The matrix checked
# @return The number of ratings
#####

sub checkEachLineCount($){
	$n = 0;
	$firstLine = true;
	for my $i (0 .. $#mat){
		$count = 0;
		for my $j (0 .. $#{ $mat[$i] }){
			$count += $mat[$i][$j];
		}
		if($firstLine){
			$n = $count;
			$firstLine = false;
		}
		if($n != $count){
			print "Line count != ".$n." (n value).\n";
		}
		return $n;
	}
}


function fleissKappa($table){

    /*   
    *   $table is an n x m array containing the classification counts
    *   
    *   adapted from the example in en.wikipedia.org/wiki/Fleiss'_kappa 
    * 
    */

    $subjects = count($table);
    $classes = count($table[0]);
    $raters = array_sum($table[0]);

    for($q = 1; $q < count($table); $q++){

        if(count($table[$q])!=$classes){

            print("no equal number of classes.");
            exit(0);
        }

        if(array_sum($table[$q])!=$raters){

            print("no equal number of raters.");
            exit(0);
        }

    }

    $pj = array();
    $pi = array();

    for($j = 0; $j < $subjects; $j++){

        $pi[$j] =0;
    }

    for($i = 0; $i < $classes; $i++){

        $tpj = 0;

        for($j = 0; $j < $subjects; $j++){

            $tpj += $table[$j][$i];
            $pi[$j] +=  $table[$j][$i]*$table[$j][$i];

        }

        $pj[$i] = $tpj/($raters*$subjects);

    }

    for($j = 0; $j < $subjects; $j++){
        $pi[$j] = $pi[$j]-$raters;
        $pi[$j] = $pi[$j]*(1/($raters*($raters-1)));
    }

    $pcarret = array_sum($pi)/$subjects;
    $pecarret = 0;

    for($i = 0; $i < count($pj);$i++){

        $pecarret += $pj[$i]*$pj[$i];

    }

    $kappa = ($pcarret-$pecarret)/(1-$pecarret);

    return $kappa;

}

Scala

[edit | edit source]
/** elemets: List[List[Double]]:  outer list of subjects, inner list of categories 
*/ 
class Kappa(elems: List[List[Double]]) {
  // Require all lists be the same length
  require(elems forall (_.length == k))
  def n = if (elems.isEmpty) 0 else (0.0 /: elems.head)(_ + _)
  def N = elems.length
  def k = if (elems.isEmpty) 0 else elems.head.length
  def lp = p(elems, List[Double]()) map (_ / (n * N))
  def bp = elems map (P(_)) map (calc(_))
  def pBar = (0.0 /: bp)(_ + _) / N
  def pBarE = (0.0 /: lp)(_ + scala.math.pow(_, 2))

  private def p(matrix: List[List[Double]], working: List[Double]): List[Double] = {
    if (matrix.size == 1) matrix.head
    else {
      (matrix.head, p(matrix.tail, working)).zipped.map(_ + _)
    }
  }

  private def P(l: List[Double]) = (0.0 /: l)(_ + scala.math.pow(_, 2))

  def calc(x: Double) = (x - n) / (n * (n - 1))

  def kappa(): Double = {
    println(n + " raters")
    println(N + " subjects")
    println(k + " categories")
    println("pBar " + pBar)
    println("pBarE " + pBarE)
    val kappa = (pBar - pBarE) / (1.0 - pBarE)
    println("Kappa " + kappa)
    kappa
  }

}

object Kappa {

  def main(args: Array[String]) {
    val k = new Kappa(List(List(0.0, 0.0, 0.0, 0.0, 14.0),
      List(0.0, 2.0, 6.0, 4.0, 2.0),
      List(0.0, 0.0, 3.0, 5.0, 6.0),
      List(0.0, 3.0, 9.0, 2.0, 0.0),
      List(2.0, 2.0, 8.0, 1.0, 1.0),
      List(7.0, 7.0, 0.0, 0.0, 0.0),
      List(3.0, 2.0, 6.0, 3.0, 0.0),
      List(2.0, 5.0, 3.0, 2.0, 2.0),
      List(6.0, 5.0, 2.0, 1.0, 0.0),
      List(0.0, 2.0, 2.0, 3.0, 7.0)))

    k.kappa()
  }
}