Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"githubPullRequests.ignoredPullRequestBranches": [
"master"
]
}
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# qvalue-python 0.2 (2024-01-27)

* Forked https://github.com/nfusi/qvalue
* Fixed errors due to deprecated scipy functions
* Separate the pi0 estimation function
* Add a quick plotting function
* Added this `NEWS.md` file to track changes to the package.
48 changes: 36 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,48 @@
qvalue
======
# Converts p-values to q-values

Converts p-values in q-values, see (Storey and Tibshirani, 2003)
This Python package implements qvalue and pi0 estimation from Storey and Tibshirani (2003).

The only function that needs to be called is 'estimate()'.
The only function that needs to be called is 'qvalue()'.
It will accept a numpy array of pvalues and will return a numpy array of qvalues.
For instance, given some uniform p-values:

import numpy as np
import qvalue
```Python
import numpy as np
from qvalue.qvalue import qvalue, pi0est, plot_qvalue

pv = np.random.uniform(0.0, 1.0, size = (1000,))
pv = np.random.uniform(0.0, 1.0, size = (1000,))
```

it's possible to convert them in q-values by calling

qv = qvalue.estimate(pv)
```Python
qv, pi0 = qvalue(pv)
```

estimate() also includes a memory-efficient (but not CPU-efficient) procedure for the case
qvalue() also includes a memory-efficient (but not CPU-efficient) procedure for the case
when it's not possible to store both the p-values and the q-values in memory at the same time.
In addition it's possible to change the number of hypotheses tested or some parameters of the
procedure (like pi0).
In addition it's possible to change the number of hypotheses tested (m) or the proportion of
null hypotheses (pi0).

We refer to the documentation (in ipython, 'qvalue.estimate?') for more informations.
pi0() estimates the proportion of null hypotheses:

```Python
pi0 = pi0est(pv)
```

Visualize the q-value and p-values with:

```Python
plot_qvalue(qv,pv)
```

## Installation

Install this repo:
```
pip install --upgrade https://github.com/ncchung/qvalue-python/tarball/master
```

## References

Storey, J. D., & Tibshirani, R. (2003). Statistical significance for genomewide studies. Proceedings of the National Academy of Sciences, 100(16), 9440-9445.
1 change: 0 additions & 1 deletion qvalue/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from qvalue import *
125 changes: 93 additions & 32 deletions qvalue/qvalue.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import scipy as sp
from scipy import interpolate
import numpy as np
import matplotlib.pyplot as plt


def estimate(pv, m=None, verbose=False, lowmem=False, pi0=None):
def qvalue(pv, pi0=None, m=None, lowmem=False, verbose=False, plot=False):
"""
Estimates q-values from p-values

Expand Down Expand Up @@ -34,58 +35,118 @@ def estimate(pv, m=None, verbose=False, lowmem=False, pi0=None):
elif pi0 is not None:
pi0 = pi0
else:
# evaluate pi0 for different lambdas
pi0 = []
lam = sp.arange(0, 0.90, 0.01)
counts = sp.array([(pv > i).sum() for i in sp.arange(0, 0.9, 0.01)])
for l in range(len(lam)):
pi0.append(counts[l]/(m*(1-lam[l])))

pi0 = sp.array(pi0)

# fit natural cubic spline
tck = interpolate.splrep(lam, pi0, k=3)
pi0 = interpolate.splev(lam[-1], tck)
if verbose:
print("qvalues pi0=%.3f, estimated proportion of null features " % pi0)

if pi0 > 1:
if verbose:
print("got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0)
pi0 = 1.0

assert(pi0 >= 0 and pi0 <= 1), "pi0 is not between 0 and 1: %f" % pi0
pi0 = pi0est(pv=pv, m=m, verbose=verbose)

if lowmem:
# low memory version, only uses 1 pv and 1 qv matrices
qv = sp.zeros((len(pv),))
qv = np.zeros((len(pv),))
last_pv = pv.argmax()
qv[last_pv] = (pi0*pv[last_pv]*m)/float(m)
pv[last_pv] = -sp.inf
pv[last_pv] = -np.inf
prev_qv = last_pv
for i in xrange(int(len(pv))-2, -1, -1):
for i in range(int(len(pv))-2, -1, -1):
cur_max = pv.argmax()
qv_i = (pi0*m*pv[cur_max]/float(i+1))
pv[cur_max] = -sp.inf
pv[cur_max] = -np.inf
qv_i1 = prev_qv
qv[cur_max] = min(qv_i, qv_i1)
prev_qv = qv[cur_max]

else:
p_ordered = sp.argsort(pv)
p_ordered = np.argsort(pv)
pv = pv[p_ordered]
qv = pi0 * m/len(pv) * pv
qv[-1] = min(qv[-1], 1.0)

for i in xrange(len(pv)-2, -1, -1):
for i in range(len(pv)-2, -1, -1):
qv[i] = min(pi0*m*pv[i]/(i+1.0), qv[i+1])

# reorder qvalues
qv_temp = qv.copy()
qv = sp.zeros_like(qv)
qv = np.zeros_like(qv)
qv[p_ordered] = qv_temp

if plot:
plot_qvalue(qv, pv)

# reshape qvalues
qv = qv.reshape(original_shape)

return qv
return qv, pi0

def pi0est(pv, m=None, verbose=False):
"""
Estimates pi0 from p-values

Args
=====

m: number of tests. If not specified m = pv.size
verbose: print verbose messages? (default False)
"""
assert(pv.min() >= 0 and pv.max() <= 1), "p-values should be between 0 and 1"

original_shape = pv.shape
pv = pv.ravel() # flattens the array in place, more efficient than flatten()

if m is None:
m = float(len(pv))
else:
# the user has supplied an m
m *= 1.0

# evaluate pi0 for different lambdas
pi0 = []
lam = np.arange(0, 0.90, 0.01)
counts = np.array([(pv > i).sum() for i in np.arange(0, 0.9, 0.01)])
for l in range(len(lam)):
pi0.append(counts[l]/(m*(1-lam[l])))

pi0 = np.array(pi0)

# fit natural cubic spline
tck = interpolate.splrep(lam, pi0, k=3)
pi0 = interpolate.splev(lam[-1], tck)
if verbose:
print("qvalues pi0=%.3f, estimated proportion of null features " % pi0)

if pi0 > 1:
if verbose:
print("got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0)
pi0 = 1.0

assert(pi0 >= 0 and pi0 <= 1), "pi0 is not between 0 and 1: %f" % pi0

return pi0

def plot_qvalue(qv, pv):
"""
Visualize the q-value results

Args
=====
qv: q-values
pv: p-values
"""
qv_thresholds = np.linspace(0, 1, 100)
num_significant_tests = [np.sum(qv <= threshold) for threshold in qv_thresholds]

fig, axes = plt.subplots(1, 2, figsize=(9, 4))

id = np.argsort(pv)
pv = pv[id]
qv = qv[id]

# pv vs. qv
axes[0].plot(pv, qv)
axes[0].set_title('p-values vs. q-values')
axes[0].set_xlabel('p-values')
axes[0].set_ylabel('q-values')

# qv vs. significant tests
axes[1].plot(qv_thresholds, num_significant_tests)
axes[1].set_title('q-value vs. # Significant Tests')
axes[1].set_xlabel('q-value')
axes[1].set_ylabel('Number of Significant Tests')

plt.tight_layout()
plt.show()
11 changes: 6 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import distutils.core
from distutils.core import setup

version_number = '0.1'
version_number = '0.2'
setup(name = 'qvalue',
version = version_number,
description = 'Converts p-values in q-values in order to account for multiple hypotheses testing, see (Storey and Tibshirani, 2003)',
long_description = open('README.md').read(),
author = 'Nicolo Fusi',
author_email = 'nicolo.fusi@sheffield.ac.uk',
author = 'Nicolo Fusi, Neo Christopher Chung',
author_email = 'nicolo.fusi@sheffield.ac.uk, nchchung@gmail.com',
packages = ['qvalue'],
requires = ['numpy (>=1.5)',
'scipy (>=0.8)'],
requires = ['numpy (>=1.24.1)',
'scipy (>=1.13.1)',
'matplotlib (>=3.9.2)'],
license = '3-clause BSD',
)