Skip to content

Commit 1a55dc8

Browse files
committed
Update: dof documentation
1 parent 61e2a17 commit 1a55dc8

File tree

3 files changed

+11
-3
lines changed

3 files changed

+11
-3
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="principal-feature-analysis",
8-
version="1.0.7",
8+
version="1.0.8",
99
author="Tim Breitenbach & Lauritz Rasbach",
1010
1111
description="The first package for Principal Feature Analysis",

src/principal_feature_analysis/find_relevant_principal_features.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,11 @@ def find_relevant_principal_features(data,number_output_functions,cluster_size,a
103103
if sum(expfreq.flatten() < 1) > 0:
104104
counter_bins_less_than1_relevant_principal_features += 1
105105
pv = scipy.stats.chisquare(freq_data_product.flatten(), expfreq.flatten(),ddof=(freq_data_product.shape[0]-1)+(freq_data_product.shape[1]-1))[1]
106-
# ddof=-1 to have the degrees of freedom of the chi square eaual the number of bins, see corresponding paper (Appendix) for details
106+
# According to the documentation of scipy.stats.chisquare, the degrees of freedom is k-1 - ddof where ddof=0 by default and k=freq_data_product.shape[0]*freq_data_product.shape[0].
107+
# According to literatur, the chi square test statistic for a test of independence (r x m contingency table) is approximately chi square distributed (under some assumptions) with degrees of freedom equal
108+
# freq_data_product.shape[0]-1)*(freq_data_product.shape[1]-1) = freq_data_product.shape[0]*freq_data_product.shape[1] - freq_data_product.shape[0] - freq_data_product.shape[1] + 1.
109+
# Consequently, ddof is set equal freq_data_product.shape[0]-1+freq_data_product.shape[1]-1 to adjust the degrees of freedom accordingly.
110+
107111
# if p-value pv is less than alpha the hypothesis that j is independent of the output function is rejected
108112
if pv <= alpha:
109113
dependent=1

src/principal_feature_analysis/principal_feature_analysis.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ def principal_feature_analysis(cluster_size,data,number_output_functions,freq_da
5757
if sum(expfreq.flatten() < 1) > 0:
5858
counter_bin_less_than1 += 1
5959
pv = scipy.stats.chisquare(freq_data_product.flatten(), expfreq.flatten(),ddof=(freq_data_product.shape[0]-1)+(freq_data_product.shape[1]-1))[1]
60-
# ddof=-1 to have the degrees of freedom of the chi square eaual the number of bins, see corresponding paper (Appendix) for details
60+
# According to the documentation of scipy.stats.chisquare, the degrees of freedom is k-1 - ddof where ddof=0 by default and k=freq_data_product.shape[0]*freq_data_product.shape[0].
61+
# According to literatur, the chi square test statistic for a test of independence (r x m contingency table) is approximately chi square distributed (under some assumptions) with degrees of freedom equal
62+
# freq_data_product.shape[0]-1)*(freq_data_product.shape[1]-1) = freq_data_product.shape[0]*freq_data_product.shape[1] - freq_data_product.shape[0] - freq_data_product.shape[1] + 1.
63+
# Consequently, ddof is set equal freq_data_product.shape[0]-1+freq_data_product.shape[1]-1 to adjust the degrees of freedom accordingly.
64+
6165
# if p-value pv is less than alpha the hypothesis that j is independent of i is rejected
6266
if pv <= alpha:
6367
global_adjm[cluster[i], cluster[j] ] = 1

0 commit comments

Comments
 (0)