From 2f80c232c1df270fa7ef15357025cd2c71b4c4c0 Mon Sep 17 00:00:00 2001 From: Neo Christopher Chung Date: Mon, 27 Jan 2025 16:57:18 +0100 Subject: [PATCH 1/3] fixed functions for newer numpy/scipy versions --- NEWS.md | 7 +++ README.md | 41 +++++++++++----- qvalue/qvalue.py | 125 +++++++++++++++++++++++++++++++++++------------ setup.py | 11 +++-- 4 files changed, 135 insertions(+), 49 deletions(-) create mode 100644 NEWS.md diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..afd45f5 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,7 @@ +# qvalue-python 0.2 (2024-01-27) + +* Forked https://github.com/nfusi/qvalue +* Fixed errors due to deprecated scipy functions +* Separate the pi0 estimation function +* Add a quick plotting function +* Added this `NEWS.md` file to track changes to the package. \ No newline at end of file diff --git a/README.md b/README.md index 89b1824..2c7947a 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,41 @@ -qvalue -====== +# Converts p-values to q-values -Converts p-values in q-values, see (Storey and Tibshirani, 2003) +This Python package implements qvalue and pi0 estimation from Storey and Tibshirani (2003). -The only function that needs to be called is 'estimate()'. +The only function that needs to be called is 'qvalue()'. It will accept a numpy array of pvalues and will return a numpy array of qvalues. For instance, given some uniform p-values: - import numpy as np - import qvalue +```Python +import numpy as np +from qvalue import qvalue, pi0est, plot_qvalue - pv = np.random.uniform(0.0, 1.0, size = (1000,)) +pv = np.random.uniform(0.0, 1.0, size = (1000,)) +``` it's possible to convert them in q-values by calling - qv = qvalue.estimate(pv) +```Python +qv, pi0 = qvalue(pv) +``` -estimate() also includes a memory-efficient (but not CPU-efficient) procedure for the case +qvalue() also includes a memory-efficient (but not CPU-efficient) procedure for the case when it's not possible to store both the p-values and the q-values in memory at the same time. -In addition it's possible to change the number of hypotheses tested or some parameters of the -procedure (like pi0). +In addition it's possible to change the number of hypotheses tested (m) or the proportion of +null hypotheses (pi0). -We refer to the documentation (in ipython, 'qvalue.estimate?') for more informations. +pi0() estimates the proportion of null hypotheses: + +```Python +pi0 = pi0est(pv) +``` + +Visualize the q-value and p-values with: + +```Python +plot_qvalue(qv,pv) +``` + +## References + +Storey, J. D., & Tibshirani, R. (2003). Statistical significance for genomewide studies. Proceedings of the National Academy of Sciences, 100(16), 9440-9445. \ No newline at end of file diff --git a/qvalue/qvalue.py b/qvalue/qvalue.py index 8c50690..42df3b9 100644 --- a/qvalue/qvalue.py +++ b/qvalue/qvalue.py @@ -1,8 +1,9 @@ import scipy as sp from scipy import interpolate +import numpy as np +import matplotlib.pyplot as plt - -def estimate(pv, m=None, verbose=False, lowmem=False, pi0=None): +def qvalue(pv, pi0=None, m=None, lowmem=False, verbose=False, plot=False): """ Estimates q-values from p-values @@ -34,58 +35,118 @@ def estimate(pv, m=None, verbose=False, lowmem=False, pi0=None): elif pi0 is not None: pi0 = pi0 else: - # evaluate pi0 for different lambdas - pi0 = [] - lam = sp.arange(0, 0.90, 0.01) - counts = sp.array([(pv > i).sum() for i in sp.arange(0, 0.9, 0.01)]) - for l in range(len(lam)): - pi0.append(counts[l]/(m*(1-lam[l]))) - - pi0 = sp.array(pi0) - - # fit natural cubic spline - tck = interpolate.splrep(lam, pi0, k=3) - pi0 = interpolate.splev(lam[-1], tck) - if verbose: - print("qvalues pi0=%.3f, estimated proportion of null features " % pi0) - - if pi0 > 1: - if verbose: - print("got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0) - pi0 = 1.0 - - assert(pi0 >= 0 and pi0 <= 1), "pi0 is not between 0 and 1: %f" % pi0 + pi0 = pi0est(pv=pv, m=m, verbose=verbose) if lowmem: # low memory version, only uses 1 pv and 1 qv matrices - qv = sp.zeros((len(pv),)) + qv = np.zeros((len(pv),)) last_pv = pv.argmax() qv[last_pv] = (pi0*pv[last_pv]*m)/float(m) - pv[last_pv] = -sp.inf + pv[last_pv] = -np.inf prev_qv = last_pv - for i in xrange(int(len(pv))-2, -1, -1): + for i in range(int(len(pv))-2, -1, -1): cur_max = pv.argmax() qv_i = (pi0*m*pv[cur_max]/float(i+1)) - pv[cur_max] = -sp.inf + pv[cur_max] = -np.inf qv_i1 = prev_qv qv[cur_max] = min(qv_i, qv_i1) prev_qv = qv[cur_max] - else: - p_ordered = sp.argsort(pv) + p_ordered = np.argsort(pv) pv = pv[p_ordered] qv = pi0 * m/len(pv) * pv qv[-1] = min(qv[-1], 1.0) - for i in xrange(len(pv)-2, -1, -1): + for i in range(len(pv)-2, -1, -1): qv[i] = min(pi0*m*pv[i]/(i+1.0), qv[i+1]) # reorder qvalues qv_temp = qv.copy() - qv = sp.zeros_like(qv) + qv = np.zeros_like(qv) qv[p_ordered] = qv_temp + if plot: + plot_qvalue(qv, pv) + # reshape qvalues qv = qv.reshape(original_shape) - return qv + return qv, pi0 + +def pi0est(pv, m=None, verbose=False): + """ + Estimates pi0 from p-values + + Args + ===== + + m: number of tests. If not specified m = pv.size + verbose: print verbose messages? (default False) + """ + assert(pv.min() >= 0 and pv.max() <= 1), "p-values should be between 0 and 1" + + original_shape = pv.shape + pv = pv.ravel() # flattens the array in place, more efficient than flatten() + + if m is None: + m = float(len(pv)) + else: + # the user has supplied an m + m *= 1.0 + + # evaluate pi0 for different lambdas + pi0 = [] + lam = np.arange(0, 0.90, 0.01) + counts = np.array([(pv > i).sum() for i in np.arange(0, 0.9, 0.01)]) + for l in range(len(lam)): + pi0.append(counts[l]/(m*(1-lam[l]))) + + pi0 = np.array(pi0) + + # fit natural cubic spline + tck = interpolate.splrep(lam, pi0, k=3) + pi0 = interpolate.splev(lam[-1], tck) + if verbose: + print("qvalues pi0=%.3f, estimated proportion of null features " % pi0) + + if pi0 > 1: + if verbose: + print("got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0) + pi0 = 1.0 + + assert(pi0 >= 0 and pi0 <= 1), "pi0 is not between 0 and 1: %f" % pi0 + + return pi0 + +def plot_qvalue(qv, pv): + """ + Visualize the q-value results + + Args + ===== + qv: q-values + pv: p-values + """ + qv_thresholds = np.linspace(0, 1, 100) + num_significant_tests = [np.sum(qv <= threshold) for threshold in qv_thresholds] + + fig, axes = plt.subplots(1, 2, figsize=(9, 4)) + + id = np.argsort(pv) + pv = pv[id] + qv = qv[id] + + # pv vs. qv + axes[0].plot(pv, qv) + axes[0].set_title('p-values vs. q-values') + axes[0].set_xlabel('p-values') + axes[0].set_ylabel('q-values') + + # qv vs. significant tests + axes[1].plot(qv_thresholds, num_significant_tests) + axes[1].set_title('q-value vs. # Significant Tests') + axes[1].set_xlabel('q-value') + axes[1].set_ylabel('Number of Significant Tests') + + plt.tight_layout() + plt.show() \ No newline at end of file diff --git a/setup.py b/setup.py index 4210e01..960f8a7 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,16 @@ import distutils.core from distutils.core import setup -version_number = '0.1' +version_number = '0.2' setup(name = 'qvalue', version = version_number, description = 'Converts p-values in q-values in order to account for multiple hypotheses testing, see (Storey and Tibshirani, 2003)', long_description = open('README.md').read(), - author = 'Nicolo Fusi', - author_email = 'nicolo.fusi@sheffield.ac.uk', + author = 'Nicolo Fusi, Neo Christopher Chung', + author_email = 'nicolo.fusi@sheffield.ac.uk, nchchung@gmail.com', packages = ['qvalue'], - requires = ['numpy (>=1.5)', - 'scipy (>=0.8)'], + requires = ['numpy (>=1.24.1)', + 'scipy (>=1.13.1)', + 'matplotlib (>=3.9.2)'], license = '3-clause BSD', ) From 0b0592c58eb2dee6ef82a49b897b4931d0f1df3f Mon Sep 17 00:00:00 2001 From: Neo Christopher Chung Date: Mon, 27 Jan 2025 16:20:59 +0000 Subject: [PATCH 2/3] add install command --- .vscode/settings.json | 5 +++++ README.md | 7 +++++++ qvalue/__init__.py | 1 - 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..6c2ff60 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "githubPullRequests.ignoredPullRequestBranches": [ + "master" + ] +} \ No newline at end of file diff --git a/README.md b/README.md index 2c7947a..4795233 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,13 @@ Visualize the q-value and p-values with: plot_qvalue(qv,pv) ``` +## Installation + +Install this repo: +``` +pip install --upgrade https://github.com/ncchung/qvalue-python/tarball/master +``` + ## References Storey, J. D., & Tibshirani, R. (2003). Statistical significance for genomewide studies. Proceedings of the National Academy of Sciences, 100(16), 9440-9445. \ No newline at end of file diff --git a/qvalue/__init__.py b/qvalue/__init__.py index 63f3a72..e69de29 100644 --- a/qvalue/__init__.py +++ b/qvalue/__init__.py @@ -1 +0,0 @@ -from qvalue import * From 19c7dbb6ee6375e9a729012f3c69a0041cbb57ef Mon Sep 17 00:00:00 2001 From: Neo Christopher Chung Date: Mon, 27 Jan 2025 17:23:28 +0100 Subject: [PATCH 3/3] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4795233..cfb5684 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ For instance, given some uniform p-values: ```Python import numpy as np -from qvalue import qvalue, pi0est, plot_qvalue +from qvalue.qvalue import qvalue, pi0est, plot_qvalue pv = np.random.uniform(0.0, 1.0, size = (1000,)) ``` @@ -45,4 +45,4 @@ pip install --upgrade https://github.com/ncchung/qvalue-python/tarball/master ## References -Storey, J. D., & Tibshirani, R. (2003). Statistical significance for genomewide studies. Proceedings of the National Academy of Sciences, 100(16), 9440-9445. \ No newline at end of file +Storey, J. D., & Tibshirani, R. (2003). Statistical significance for genomewide studies. Proceedings of the National Academy of Sciences, 100(16), 9440-9445.