Algorithm Implementation/Pseudorandom Numbers/Chi-Square Test

From Wikibooks, open books for an open world
Jump to: navigation, search

VB.NET[edit]

The following is a program to calculate the chi-square value for N positive integers less than r.

    'Calculates the chi-square value for N positive integers less than r
    'Source: "Algorithms in C" - Robert Sedgewick - pp. 517
    'NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
    'since it could be wrong in about one out of ten times."
    Public Function IsChiRandom(ByVal randomNums() As Integer, ByVal r As Integer) As Boolean
 
        'Calculate the number of samples - N
        Dim N As Integer = randomNums.Length
 
        'According to Sedgewick: "This is valid if N is greater than about 10r"
        If N <= (10 * r) Then
            Return False
        End If
 
        Dim N_r As Double = N / r
        Dim HT As Hashtable
        Dim chi_square As Double = 0
 
        'PART A: Get frequency of randoms
        HT = Me.RandomFrequency(randomNums)
 
        'PART B: Calculate chi-square - this approach is in Sedgewick
        Dim f As Integer
        For Each Item As DictionaryEntry In HT
            f = Integer.Parse(Item.Value) - N_r
            chi_square += Math.Pow(f, 2)
        Next
        chi_square = chi_square / N_r
 
        'PART C: According to Sedgewick: "The statistic should be within 2(r)^1/2 of r
        'This is valid if N is greater than about 10r"
 
        If (r - 2 * Math.Sqrt(r) <= chi_square) And (r + 2 * Math.Sqrt(r) >= chi_square) Then
            Return True
        Else
            Return False
        End If
 
    End Function
 
    'Gets the frequency of occurrence of a randomly generated array of integers
    'Output: A hashtable, key being the random number and value its frequency
    Private Function RandomFrequency(ByVal randomNums() As Integer) As Hashtable
 
        Dim HT As New Hashtable
        Dim N As Integer = randomNums.Length
 
        Dim count As Integer = 0
        For i As Integer = 0 To N - 1
            If HT.ContainsKey(randomNums(i)) Then
                HT.Item(randomNums(i)) = HT.Item(randomNums(i)) + 1
            Else
                HT.Add(randomNums(i), 1)
            End If
        Next
 
        Return HT
 
    End Function

C#[edit]

	// Calculates the chi-square value for N positive integers less than r
	// Source: "Algorithms in C" - Robert Sedgewick - pp. 517
	// NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
	// since it could be wrong in about one out of ten times."
	public bool IsRandom(int[] randomNums, int r)
	{
		//Calculate the number of samples - N
		int N = randomNums.Length;
 
		//According to Sedgewick: "This is valid if N is greater than about 10r"
		if (N <= 10*r)
			return false;
 
		double N_r = N / r;
		double chi_square = 0;
		Hashtable HT;
 
		//PART A: Get frequency of randoms
		HT = this.RandomFrequency (randomNums);
 
		//PART B: Calculate chi-square - this approach is in Sedgewick
		double f;
		foreach (DictionaryEntry Item in HT)
		{
			f = (int)(Item.Value) - N_r;
			chi_square += Math.Pow(f, 2);
		}
		chi_square = chi_square / N_r;
 
		//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
		//This is valid if N is greater than about 10r"
		if((r - 2 * Math.Sqrt (r) <= chi_square) &  (r + 2 * Math.Sqrt (r) >= chi_square))
			return true;
		else
			return false;
	}
 
	//Gets the frequency of occurrence of a randomly generated array of integers
	//Output: A hashtable, key being the random number and value its frequency
	private Hashtable RandomFrequency(int[] randomNums)
	{
		Hashtable HT = new Hashtable();
		int N = randomNums.Length;
 
		for(int i = 0; i <= N-1; i++)
		{
			if (HT.ContainsKey(randomNums[i]))
				HT[randomNums[i]] = (int) HT[randomNums[i]] + 1;
			else
				HT[randomNums[i]] = 1;
		}
 
		return HT;
	}

Java[edit]

	/**
	 * Calculates the chi-square value for N positive integers less than r
	 * Source: "Algorithms in C" - Robert Sedgewick - pp. 517
	 * NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
	 * since it could be wrong in about one out of ten times."
	 * @param  randomNums  a uniformly-randomly-generated array of integers
	 * @param  r           upper bound for the random range
	 * @return             whether it is likely to be uniformly randomly generated
	 */
	public static boolean isRandom(int[] randomNums, int r)
	{
		//According to Sedgewick: "This is valid if N is greater than about 10r"
		if (randomNums.length <= 10 * r)
			return false;
 
		//PART A: Get frequency of randoms
		Map<Integer,Integer> ht = getFrequencies(randomNums);
 
		//PART B: Calculate chi-square - this approach is in Sedgewick
		double n_r = (double)randomNums.length / r;
		double chiSquare = 0;
 
		for (int v : ht.values())
		{
			double f = v - n_r;
			chiSquare += f * f;
		}
		chiSquare /= n_r;
 
		//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
		//This is valid if N is greater than about 10r"
		return Math.abs(chiSquare - r) <= 2 * Math.sqrt(r);
	}
 
	/**
	 * @param  nums  an array of integers
	 * @return       a Map, key being the number and value its frequency
	 */
	private static Map<Integer,Integer> getFrequencies(int[] nums)
	{
		Map<Integer,Integer> freqs = new HashMap<Integer,Integer>();
 
		for (int x : nums)
		{
			if (freqs.containsKey(x))
				freqs.put(x, freqs.get(x) + 1);
			else
				freqs.put(x, 1);
		}
 
		return freqs;
	}

Python[edit]

import math # for sqrt
 
def isRandom(randomNums, r):
	""" Calculates the chi-square value for N positive integers less than r
	    Source: "Algorithms in C" - Robert Sedgewick - pp. 517
	    NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
	    since it could be wrong in about one out of ten times." """
	#Calculate the number of samples - N
	N = len(randomNums)
 
	#According to Sedgewick: "This is valid if N is greater than about 10r"
	if N <= 10 * r:
		return False
 
	N_r = N / r
	chi_square = 0.0
 
	#PART A: Get frequency of randoms
	HT = getFrequencies(randomNums)
 
	#PART B: Calculate chi-square - this approach is in Sedgewick
	for v in HT.itervalues():
		f = v - N_r
		chi_square += f * f
 
	chi_square /= N_r
 
	#PART C: According to Sedgewick: "The statistic should be within 2(r)^1/2 of r
	#This is valid if N is greater than about 10r"
	return abs(chi_square - r) <= 2 * math.sqrt(r)
 
def getFrequencies(nums):
	""" Gets the frequency of occurrence of a randomly generated array of integers
	    Output: A dictionary, key being the random number and value its frequency """
	freqs = {}
 
	for x in nums:
		if x in freqs:
			freqs[x] += 1
		else:
			freqs[x] = 1
 
	return freqs