nfusi · ncchung · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "githubPullRequests.ignoredPullRequestBranches": [
+        "master"
+    ]
+}
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,7 @@
+# qvalue-python 0.2 (2024-01-27)
+
+* Forked https://github.com/nfusi/qvalue
+* Fixed errors due to deprecated scipy functions
+* Separate the pi0 estimation function
+* Add a quick plotting function
+* Added this `NEWS.md` file to track changes to the package.
diff --git a/README.md b/README.md
@@ -1,24 +1,48 @@
-qvalue
-======
+# Converts p-values to q-values
 
-Converts p-values in q-values, see (Storey and Tibshirani, 2003)
+This Python package implements qvalue and pi0 estimation from Storey and Tibshirani (2003).
 
-The only function that needs to be called is 'estimate()'. 
+The only function that needs to be called is 'qvalue()'. 
 It will accept a numpy array of pvalues and will return a numpy array of qvalues.
 For instance, given some uniform p-values:
 
-    import numpy as np 
-    import qvalue
+```Python
+import numpy as np 
+from qvalue.qvalue import qvalue, pi0est, plot_qvalue
 
-    pv = np.random.uniform(0.0, 1.0, size = (1000,))
+pv = np.random.uniform(0.0, 1.0, size = (1000,))
+```
 
 it's possible to convert them in q-values by calling
 
-     qv = qvalue.estimate(pv)
+```Python
+qv, pi0 = qvalue(pv)
+```
 
-estimate() also includes a memory-efficient (but not CPU-efficient) procedure for the case
+qvalue() also includes a memory-efficient (but not CPU-efficient) procedure for the case
 when it's not possible to store both the p-values and the q-values in memory at the same time.
-In addition it's possible to change the number of hypotheses tested or some parameters of the
-procedure (like pi0).
+In addition it's possible to change the number of hypotheses tested (m) or the proportion of
+null hypotheses (pi0).
 
-We refer to the documentation (in ipython, 'qvalue.estimate?') for more informations.
+pi0() estimates the proportion of null hypotheses:
+
+```Python
+pi0 = pi0est(pv)
+```
+
+Visualize the q-value and p-values with:
+
+```Python
+plot_qvalue(qv,pv)
+```
+
+## Installation
+
+Install this repo:
+```
+pip install --upgrade https://github.com/ncchung/qvalue-python/tarball/master
+```
+
+## References
+
+Storey, J. D., & Tibshirani, R. (2003). Statistical significance for genomewide studies. Proceedings of the National Academy of Sciences, 100(16), 9440-9445.
diff --git a/qvalue/__init__.py b/qvalue/__init__.py
@@ -1 +0,0 @@
-from qvalue import *

diff --git a/qvalue/qvalue.py b/qvalue/qvalue.py
@@ -1,8 +1,9 @@
 import scipy as sp
 from scipy import interpolate
+import numpy as np
+import matplotlib.pyplot as plt
 
-
-def estimate(pv, m=None, verbose=False, lowmem=False, pi0=None):
+def qvalue(pv, pi0=None, m=None, lowmem=False, verbose=False, plot=False):
     """
     Estimates q-values from p-values
 
@@ -34,58 +35,118 @@ def estimate(pv, m=None, verbose=False, lowmem=False, pi0=None):
     elif pi0 is not None:
         pi0 = pi0
     else:
-        # evaluate pi0 for different lambdas
-        pi0 = []
-        lam = sp.arange(0, 0.90, 0.01)
-        counts = sp.array([(pv > i).sum() for i in sp.arange(0, 0.9, 0.01)])
-        for l in range(len(lam)):
-            pi0.append(counts[l]/(m*(1-lam[l])))
-
-        pi0 = sp.array(pi0)
-
-        # fit natural cubic spline
-        tck = interpolate.splrep(lam, pi0, k=3)
-        pi0 = interpolate.splev(lam[-1], tck)
-        if verbose:
-            print("qvalues pi0=%.3f, estimated proportion of null features " % pi0)
-
-        if pi0 > 1:
-            if verbose:
-                print("got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0)
-            pi0 = 1.0
-
-    assert(pi0 >= 0 and pi0 <= 1), "pi0 is not between 0 and 1: %f" % pi0
+        pi0 = pi0est(pv=pv, m=m, verbose=verbose)
 
     if lowmem:
         # low memory version, only uses 1 pv and 1 qv matrices
-        qv = sp.zeros((len(pv),))
+        qv = np.zeros((len(pv),))
         last_pv = pv.argmax()
         qv[last_pv] = (pi0*pv[last_pv]*m)/float(m)
-        pv[last_pv] = -sp.inf
+        pv[last_pv] = -np.inf
         prev_qv = last_pv
-        for i in xrange(int(len(pv))-2, -1, -1):
+        for i in range(int(len(pv))-2, -1, -1):
             cur_max = pv.argmax()
             qv_i = (pi0*m*pv[cur_max]/float(i+1))
-            pv[cur_max] = -sp.inf
+            pv[cur_max] = -np.inf
             qv_i1 = prev_qv
             qv[cur_max] = min(qv_i, qv_i1)
             prev_qv = qv[cur_max]
-
     else:
-        p_ordered = sp.argsort(pv)
+        p_ordered = np.argsort(pv)
         pv = pv[p_ordered]
         qv = pi0 * m/len(pv) * pv
         qv[-1] = min(qv[-1], 1.0)
 
-        for i in xrange(len(pv)-2, -1, -1):
+        for i in range(len(pv)-2, -1, -1):
             qv[i] = min(pi0*m*pv[i]/(i+1.0), qv[i+1])
 
         # reorder qvalues
         qv_temp = qv.copy()
-        qv = sp.zeros_like(qv)
+        qv = np.zeros_like(qv)
         qv[p_ordered] = qv_temp
 
+    if plot:
+        plot_qvalue(qv, pv)
+
     # reshape qvalues
     qv = qv.reshape(original_shape)
 
-    return qv
+    return qv, pi0
+
+def pi0est(pv, m=None, verbose=False):
+    """
+    Estimates pi0 from p-values
+
+    Args
+    =====
+
+    m: number of tests. If not specified m = pv.size
+    verbose: print verbose messages? (default False)
+    """
+    assert(pv.min() >= 0 and pv.max() <= 1), "p-values should be between 0 and 1"
+
+    original_shape = pv.shape
+    pv = pv.ravel()  # flattens the array in place, more efficient than flatten()
+
+    if m is None:
+        m = float(len(pv))
+    else:
+        # the user has supplied an m
+        m *= 1.0
+
+    # evaluate pi0 for different lambdas
+    pi0 = []
+    lam = np.arange(0, 0.90, 0.01)
+    counts = np.array([(pv > i).sum() for i in np.arange(0, 0.9, 0.01)])
+    for l in range(len(lam)):
+        pi0.append(counts[l]/(m*(1-lam[l])))
+
+    pi0 = np.array(pi0)
+
+    # fit natural cubic spline
+    tck = interpolate.splrep(lam, pi0, k=3)
+    pi0 = interpolate.splev(lam[-1], tck)
+    if verbose:
+        print("qvalues pi0=%.3f, estimated proportion of null features " % pi0)
+
+    if pi0 > 1:
+        if verbose:
+            print("got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0)
+        pi0 = 1.0
+
+    assert(pi0 >= 0 and pi0 <= 1), "pi0 is not between 0 and 1: %f" % pi0
+
+    return pi0
+
+def plot_qvalue(qv, pv):
+    """
+    Visualize the q-value results
+
+    Args
+    =====
+    qv: q-values
+    pv: p-values
+    """
+    qv_thresholds = np.linspace(0, 1, 100)
+    num_significant_tests = [np.sum(qv <= threshold) for threshold in qv_thresholds]
+
+    fig, axes = plt.subplots(1, 2, figsize=(9, 4))
+
+    id = np.argsort(pv)
+    pv = pv[id]
+    qv = qv[id]
+
+    # pv vs. qv
+    axes[0].plot(pv, qv)
+    axes[0].set_title('p-values vs. q-values')
+    axes[0].set_xlabel('p-values')
+    axes[0].set_ylabel('q-values')
+
+    # qv vs. significant tests
+    axes[1].plot(qv_thresholds, num_significant_tests)
+    axes[1].set_title('q-value vs. # Significant Tests')
+    axes[1].set_xlabel('q-value')
+    axes[1].set_ylabel('Number of Significant Tests')
+
+    plt.tight_layout()
+    plt.show()
diff --git a/setup.py b/setup.py
@@ -1,15 +1,16 @@
 import distutils.core
 from distutils.core import setup
 
-version_number = '0.1'
+version_number = '0.2'
 setup(name = 'qvalue',
       version = version_number,
       description = 'Converts p-values in q-values in order to account for multiple hypotheses testing, see (Storey and Tibshirani, 2003)',
       long_description = open('README.md').read(),
-      author = 'Nicolo Fusi',
-      author_email = 'nicolo.fusi@sheffield.ac.uk',
+      author = 'Nicolo Fusi, Neo Christopher Chung',
+      author_email = 'nicolo.fusi@sheffield.ac.uk, nchchung@gmail.com',
       packages = ['qvalue'],
-      requires = ['numpy (>=1.5)',
-                  'scipy (>=0.8)'],
+      requires = ['numpy (>=1.24.1)',
+                  'scipy (>=1.13.1)',
+                  'matplotlib (>=3.9.2)'],
       license = '3-clause BSD',
       )