stats/tests

Essential Tests

Core statistical tests.

Correlation test


@dataclass
class cortest_Result:
	coeff: float #correlation coefficient
	ci:tuple[float, float] #confidence interval

def cor_test(
	x:Iterable, 
	y:Iterable,
	conflevel=0.95,
	method="pearson")->cortest_Result:
"""
returns coefficient and confidence interval.  

x, y: x and y data  
conflevel: Confidence level, [0,1]  
method: correlation method, "pearson" or "spearman"
"""

Example


from scisuit.stats import cor_test

#each list represents a column
data = [ 
[500,760,320,1000], 
[67,43,20,45], 
[39.4,25.3,30,22], 
]
conflevel = 0.95
method = "pearson"


for i in range(len(data)):
   for j in range(i):
      print(f"Col {i+1} & Col {j+1}")
		
      result = cor_test(data[i], data[j], conflevel=conflevel, method=method)
      print(result)

Col 2 & Col 1
Correlation coefficient = 0.2742
CI = (-0.9327, 0.9776)

Col 3 & Col 1
Correlation coefficient = -0.7020
CI = (-0.9931, 0.7964)

Col 3 & Col 2
Correlation coefficient = 0.4865
CI = (-0.8914, 0.9864)

F test

Test for ratios of variances of two samples. If the assumed ratio is 1, then the test is equivalent to testing equality of variances.


def test_f(
	x:Iterable, 
	y:Iterable, 
	ratio:float = 1.0, 
	alternative:str = "two.sided", 
	conflevel:float = 0.95)->tuple[float, test_f_Result]:
"""
returns p-value and test_f_Result class.

## Input
x/y: First/second sample, ndarray/list
alternative: "two.sided", "less", "greater"
ratio: Assumed ratio of variances of the samples
conflevel: Confidence level, [0,1] 
"""


@dataclass
class test_f_Result:
	Fcritical:float
	df1:int; df2:int
	var1:float; var2:float
	CI_lower:float; CI_upper:float

Poisson 1-Sample

Available since v1.8.


class test_poisson1sample_Result:
   _method:str
   _hypotest:bool
   pvalue: float | None
   zvalue: float | None
   mean: float
   ci : tuple[float, float]
   N: int
   TotalOccurences: int


def test_poisson1sample(
	sample: Iterable[int] | None = None,
	frequency: Iterable[int] | None = None,
	samplesize: int | None = None,
	totaloccur: int | None = None,
	length:numbers.Real = 1,
	hypotest = False,
	hyporate:numbers.Real = 0.0,
	conflevel:float = 0.95,
	method = "normal",
	alternative = "two.sided")->test_poisson1sample_Result:
"""
Either sample (optionally frequency) or summarized data must be provided.  

----
Samples known  
sample: Sample data  
frequency: (Optional) Number of occurences  

----
Summarized Data  
SampleSize: Size of the sample  
totaloccur: Number of total occurences (generally sample size [i] * frequency [i])

----
length: Length of observation (time, area, etc.)  
hypotest: Should perform hypothesis test
hyporate: Hypothesis rate  
conflevel: Confidence level, [0,1]  
method: "normal" or "exact"  
alternative: "two.sided", "less" or "greater"
"""

Example #1


from scisuit.stats import test_poisson1sample
Sample = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Freq = [57, 203, 383, 525, 532, 408, 273, 139, 45, 27, 10, 6]
		
result = test_poisson1sample(
	Sample, 
	Freq, 
	length=1.0, 
	hypotest=True, 
	hyporate=4.0, 
	conflevel=0.95, 
	method="normal", 
	alternative="two.sided")
print(result)

One Sample Poisson Test (normal) 
N = 2608, Total Occurences = 10092 
Mean = 3.8696 
CI = (3.794, 3.945)
p-value = 0.000872, z-value = -3.329

Example #2


from scisuit.stats import test_poisson1sample

samplesize = 2608
totaloccur = 10092

result = test_poisson1sample(
	samplesize=samplesize, 
	totaloccur=totaloccur, 
	length=1.0, 
	hypotest=True, 
	hyporate=4.0, 
	conflevel=0.95, 
	method="normal", 
	alternative="two.sided")
print(result)

One Sample Poisson Test (normal) 
N = 2608, Total Occurences = 10092 
Mean = 3.8696 
CI = (3.794, 3.945)
p-value = 0.000872, z-value = -3.329

t test

Performs 1-sample, 2-sample and paired t-test.


def test_t (
	x:Iterable, 
	y:Iterable|None = None, 
	varequal=True, 
	alternative="two.sided", 
	mu:float=0.0, 
	conflevel=0.95, 
	paired=False ):
"""
x, y: First and second samples
varequal: assuming equal variances
alternative: 'two.sided', 'less' or 'greater'
mu: Assumed difference between samples or assumed mean
conflevel: Confidence level, [0,1]
paired: For paired t-test
"""
if y == None:
    return _test_t1(x=x, mu=mu, alternative=alternative, conflevel=conflevel )	
else:
    if paired==False:
	    return _test_t2(x=x, y=y, mu=mu, varequal=varequal, alternative=alternative, conflevel=conflevel)
    else:
	    return _test_t_paired(x=x, y=y, mu=mu, alternative=alternative, conflevel=conflevel)

Z test


def test_z(
   x:Iterable, 
   sd1:Real, 
   mu:Real, 
   y:Iterable = None,
   sd2: Real = None,
   alternative="two.sided", 
   conflevel=0.95)->test_z1_Result | test_z2_Result:

"""
x, y: Iterable   
sd1, sd2: Standard deviations of populations   
mu: Assumed difference between means of populations   
alternative: "two.sided", "less", "greater"   
conflevel: Confidence level, (0,1)   
"""

if y != None:
	return test_z2(x=x, y=y, sd1=sd1, sd2=sd2, mu=mu, alternative=alternative, conflevel=conflevel)

return test_z1(x=x, sd=sd1, mu=mu, alternative=alternative, conflevel=conflevel)

Example


from scisuit.stats import test_z

x = [141, 146, 144, 141, 141, 136, 137, 149, 141, 142, 
  142, 147, 148, 155, 150, 144, 140, 140, 139, 148, 143, 143, 149, 140, 132, 
  158, 149, 144, 145, 146, 143, 135, 147, 153, 142, 142, 138, 150, 145, 126]

result = test_z(x=x, mu=132.4, sd1=6)
print(result)

N=40, mean=143.525, Z=11.727 
p-value = 4.6e-32 (two.sided)
Confidence interval (141.666, 145.384)