Algorithm Implementation/Pseudorandom Numbers/Chi-Square Test

From Wikibooks, open books for an open world
Jump to: navigation, search

Contents


VB.NET [edit]

The following is a program to calculate the chi-square value for N positive integers less than r.

    'Calculates the chi-square value for N positive integers less than r
    'Source: "Algorithms in C" - Robert Sedgewick - pp. 517
    'NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
    'since it could be wrong in about one out of ten times."
    Public Function IsChiRandom(ByVal randomNums() As Integer, ByVal r As Integer) As Boolean
 
        'Calculate the number of samples - N
        Dim N As Integer = randomNums.Length
 
        'According to Sedgewick: "This is valid if N is greater than about 10r"
        If N <= (10 * r) Then
            Return False
        End If
 
        Dim N_r As Double = N / r
        Dim HT As Hashtable
        Dim chi_square As Double = 0
 
        'PART A: Get frequency of randoms
        HT = Me.RandomFrequency(randomNums)
 
        'PART B: Calculate chi-square - this approach is in Sedgewick
        Dim f As Integer
        For Each Item As DictionaryEntry In HT
            f = Integer.Parse(Item.Value) - N_r
            chi_square += Math.Pow(f, 2)
        Next
        chi_square = chi_square / N_r
 
        'PART C: According to Sedgewick: "The statistic should be within 2(r)^1/2 of r
        'This is valid if N is greater than about 10r"
 
        If (r - 2 * Math.Sqrt(r) <= chi_square) And (r + 2 * Math.Sqrt(r) >= chi_square) Then
            Return True
        Else
            Return False
        End If
 
    End Function
 
    'Gets the frequency of occurrence of a randomly generated array of integers
    'Output: A hashtable, key being the random number and value its frequency
    Private Function RandomFrequency(ByVal randomNums() As Integer) As Hashtable
 
        Dim HT As New Hashtable
        Dim N As Integer = randomNums.Length
 
        Dim count As Integer = 0
        For i As Integer = 0 To N - 1
            If HT.ContainsKey(randomNums(i)) Then
                HT.Item(randomNums(i)) = HT.Item(randomNums(i)) + 1
            Else
                HT.Add(randomNums(i), 1)
            End If
        Next
 
        Return HT
 
    End Function

C# [edit]

        // Calculates the chi-square value for N positive integers less than r
        // Source: "Algorithms in C" - Robert Sedgewick - pp. 517
        // NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
        // since it could be wrong in about one out of ten times."
        public bool IsRandom(int[] randomNums, int r)
        {
                //Calculate the number of samples - N
                int N = randomNums.Length;
 
                //According to Sedgewick: "This is valid if N is greater than about 10r"
                if (N <= 10*r)
                        return false;
 
                double N_r = N / r;
                double chi_square = 0;
                Hashtable HT;
 
                //PART A: Get frequency of randoms
                HT = this.RandomFrequency (randomNums);
 
                //PART B: Calculate chi-square - this approach is in Sedgewick
                double f;
                foreach (DictionaryEntry Item in HT)
                {
                        f = (int)(Item.Value) - N_r;
                        chi_square += Math.Pow(f, 2);
                }
                chi_square = chi_square / N_r;
 
                //PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
                //This is valid if N is greater than about 10r"
                if((r - 2 * Math.Sqrt (r) <= chi_square) &  (r + 2 * Math.Sqrt (r) >= chi_square))
                        return true;
                else
                        return false;
        }
 
        //Gets the frequency of occurrence of a randomly generated array of integers
        //Output: A hashtable, key being the random number and value its frequency
        private Hashtable RandomFrequency(int[] randomNums)
        {
                Hashtable HT = new Hashtable();
                int N = randomNums.Length;
 
                for(int i = 0; i <= N-1; i++)
                {
                        if (HT.ContainsKey(randomNums[i]))
                                HT[randomNums[i]] = (int) HT[randomNums[i]] + 1;
                        else
                                HT[randomNums[i]] = 1;
                }
 
                return HT;
        }

Java [edit]

        /**
         * Calculates the chi-square value for N positive integers less than r
         * Source: "Algorithms in C" - Robert Sedgewick - pp. 517
         * NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
         * since it could be wrong in about one out of ten times."
         * @param  randomNums  a uniformly-randomly-generated array of integers
         * @param  r           upper bound for the random range
         * @return             whether it is likely to be uniformly randomly generated
         */
        public static boolean isRandom(int[] randomNums, int r)
        {
                //According to Sedgewick: "This is valid if N is greater than about 10r"
                if (randomNums.length <= 10 * r)
                        return false;
 
                //PART A: Get frequency of randoms
                Map<Integer,Integer> ht = getFrequencies(randomNums);
 
                //PART B: Calculate chi-square - this approach is in Sedgewick
                double n_r = (double)randomNums.length / r;
                double chiSquare = 0;
 
                for (int v : ht.values())
                {
                        double f = v - n_r;
                        chiSquare += f * f;
                }
                chiSquare /= n_r;
 
                //PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
                //This is valid if N is greater than about 10r"
                return Math.abs(chiSquare - r) <= 2 * Math.sqrt(r);
        }
 
        /**
         * @param  nums  an array of integers
         * @return       a Map, key being the number and value its frequency
         */
        private static Map<Integer,Integer> getFrequencies(int[] nums)
        {
                Map<Integer,Integer> freqs = new HashMap<Integer,Integer>();
 
                for (int x : nums)
                {
                        if (freqs.containsKey(x))
                                freqs.put(x, freqs.get(x) + 1);
                        else
                                freqs.put(x, 1);
                }
 
                return freqs;
        }

Python [edit]

import math # for sqrt
 
def isRandom(randomNums, r):
        """ Calculates the chi-square value for N positive integers less than r
            Source: "Algorithms in C" - Robert Sedgewick - pp. 517
            NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
            since it could be wrong in about one out of ten times." """
        #Calculate the number of samples - N
        N = len(randomNums)
 
        #According to Sedgewick: "This is valid if N is greater than about 10r"
        if N <= 10 * r:
                return False
 
        N_r = N / r
        chi_square = 0.0
 
        #PART A: Get frequency of randoms
        HT = getFrequencies(randomNums)
 
        #PART B: Calculate chi-square - this approach is in Sedgewick
        for v in HT.itervalues():
                f = v - N_r
                chi_square += f * f
 
        chi_square /= N_r
 
        #PART C: According to Sedgewick: "The statistic should be within 2(r)^1/2 of r
        #This is valid if N is greater than about 10r"
        return abs(chi_square - r) <= 2 * math.sqrt(r)
 
def getFrequencies(nums):
        """ Gets the frequency of occurrence of a randomly generated array of integers
            Output: A dictionary, key being the random number and value its frequency """
        freqs = {}
 
        for x in nums:
                if x in freqs:
                        freqs[x] += 1
                else:
                        freqs[x] = 1
 
        return freqs