import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import os
import pandas as pd, numpy as np
from collections import OrderedDict
from rpy2.robjects.vectors import DataFrame, FloatVector, IntVector, StrVector, ListVector
path = os.path.expanduser("~/Documents/m6A/Data/ASE/samples")
readRDS = robjects.r['readRDS']
p_cutoff = 0.05
Import all inference on ASE for an individual, and QuASAR input.
beta: estimates of allelic imbalance
se: standard errors
pval: pvalues from an LRT for ASE
rsID: SNP ID
freq: SNP allele frequency
#ref: Number of reads mapping to the reference allele
#alt: Number of reads mapping to the alternate allele
#either: Number of reads not mapping to either allele
def RDS2df(RBP):
df = readRDS(f'{path}/{RBP}.wasp_remapped.qual10.dedup.quasar.ase.rds')
df = pandas2ri.ri2py(df)
tmp = df[0][0]
RBP_df = pandas2ri.ri2py(tmp)
RBP_df = RBP_df.rename(columns = {"annotations.rsID": "rsID", "annotations.chr": "chr",
"annotations.pos0": "pos", "betas": "beta", "betas.se": "se",
"pval2.het.ind.": "pval"})
RBP_input = pd.read_table(f"{path}/{RBP}.wasp_remapped.qual10.dedup.quasar.in.gz", compression = "gzip",
sep = "\t", header = None, usecols = [0,1,3,4,5,6,7,8,9],
names = ["chr", "pos", "ref", "alt", "rsID", "freq", "#ref", "#alt", "#either"])
RBP_df = pd.merge(RBP_df, RBP_input, on = ["chr", "pos", "rsID"], how = "left")
RBP_df.to_csv(f'{path}/{RBP}.quasar.snp_info.txt', sep = "\t", index = False)
return RBP_df
FTO = RDS2df("FTO")
HNRNPC = RDS2df("HNRNPC")
IGF2BP2 = RDS2df("IGF2BP2")
FTO[FTO["pval"] < p_cutoff*3]
rsID | chr | pos | beta | se | pval | ref | alt | freq | #ref | #alt | #either | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
11 | rs1695 | chr11 | 67352688 | 1.351097 | 0.783640 | 0.084684 | A | G | 0.352636 | 14 | 3 | 0 |
14 | rs4902 | chr12 | 57108202 | 1.187061 | 0.708578 | 0.093881 | A | G | 0.607628 | 23 | 6 | 0 |
23 | rs72984031 | chr19 | 872088 | -1.223893 | 0.795985 | 0.124151 | A | G | 0.176717 | 3 | 12 | 0 |
24 | rs891206 | chr19 | 872254 | -1.006632 | 0.674080 | 0.135347 | G | A | 0.441693 | 10 | 31 | 0 |
26 | rs12981166 | chr19 | 2270385 | -1.263369 | 0.723053 | 0.080590 | C | T | 0.293930 | 5 | 21 | 0 |
30 | rs2523175 | chr19 | 2271322 | -0.921468 | 0.637951 | 0.148621 | A | G | 0.898562 | 21 | 59 | 0 |
36 | rs11552518 | chr2 | 26455126 | -1.408342 | 0.850564 | 0.097768 | G | A | 0.142372 | 2 | 10 | 0 |
42 | rs2230653 | chr6 | 26056603 | -1.420418 | 0.661120 | 0.031674 | G | A | 0.174920 | 10 | 51 | 0 |
FTO
rsID | chr | pos | beta | se | pval | ref | alt | freq | #ref | #alt | #either | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | rs8014 | chr1 | 990983 | 0.505350 | 0.816775 | 0.536105 | G | A | 0.922324 | 7 | 4 | 0 |
1 | rs6684539 | chr1 | 11968272 | -0.245987 | 0.621327 | 0.692174 | A | G | 0.616813 | 48 | 63 | 0 |
2 | rs10874744 | chr1 | 93306316 | 0.929733 | 0.722990 | 0.198459 | G | A | 0.691893 | 17 | 6 | 0 |
3 | rs3805 | chr1 | 205683354 | -0.708717 | 0.761321 | 0.351903 | T | G | 0.233626 | 5 | 11 | 0 |
4 | rs2796271 | chr1 | 207941926 | -0.505350 | 0.816775 | 0.536105 | A | G | 0.671126 | 4 | 7 | 0 |
5 | rs3758410 | chr10 | 17271399 | -0.106749 | 0.741191 | 0.885482 | G | C | 0.414936 | 8 | 9 | 0 |
6 | rs5009539 | chr11 | 5269582 | 0.310123 | 0.647507 | 0.631975 | G | A | 0.477436 | 31 | 22 | 0 |
7 | rs1065686 | chr11 | 5269583 | 0.310123 | 0.647507 | 0.631975 | T | G | 0.718450 | 31 | 22 | 0 |
8 | rs61893083 | chr11 | 5269584 | 0.310123 | 0.647507 | 0.631975 | G | A | 0.718450 | 31 | 22 | 0 |
9 | rs62755960 | chr11 | 5269585 | 0.310123 | 0.647507 | 0.631975 | A | G | 0.718450 | 31 | 22 | 0 |
10 | rs2073686 | chr11 | 8707082 | -0.551127 | 0.628788 | 0.380763 | G | A | 0.183107 | 32 | 59 | 0 |
11 | rs1695 | chr11 | 67352688 | 1.351097 | 0.783640 | 0.084684 | A | G | 0.352636 | 14 | 3 | 0 |
12 | rs4891 | chr11 | 67353969 | 0.546706 | 0.747985 | 0.464837 | T | C | 0.356030 | 11 | 6 | 0 |
13 | rs1944108 | chr11 | 93464174 | -0.461441 | 0.754252 | 0.540679 | A | C | 0.370407 | 6 | 10 | 0 |
14 | rs4902 | chr12 | 57108202 | 1.187061 | 0.708578 | 0.093881 | A | G | 0.607628 | 23 | 6 | 0 |
15 | rs55671249 | chr16 | 591726 | 0.505350 | 0.816775 | 0.536105 | C | A | 0.054712 | 7 | 4 | 0 |
16 | rs2279258 | chr16 | 88724346 | -0.165265 | 0.809792 | 0.838289 | G | T | 0.498403 | 5 | 6 | 0 |
17 | rs4782500 | chr16 | 89008541 | -0.165265 | 0.809792 | 0.838289 | C | T | 0.229633 | 5 | 6 | 0 |
18 | rs13329773 | chr16 | 89023571 | 0.505350 | 0.816775 | 0.536105 | G | T | 0.166134 | 7 | 4 | 0 |
19 | rs17848934 | chr17 | 80054374 | 0.000000 | 0.793392 | 1.000000 | C | A | 0.177516 | 6 | 6 | 0 |
20 | rs62078748 | chr17 | 80055324 | -0.624454 | 0.804751 | 0.437773 | C | T | 0.165136 | 4 | 8 | 0 |
21 | rs906807 | chr18 | 9117866 | -1.070189 | 0.811626 | 0.187311 | T | C | 0.778954 | 3 | 10 | 0 |
22 | rs1683593 | chr19 | 867734 | 0.505350 | 0.816775 | 0.536105 | A | G | 0.244209 | 7 | 4 | 0 |
23 | rs72984031 | chr19 | 872088 | -1.223893 | 0.795985 | 0.124151 | A | G | 0.176717 | 3 | 12 | 0 |
24 | rs891206 | chr19 | 872254 | -1.006632 | 0.674080 | 0.135347 | G | A | 0.441693 | 10 | 31 | 0 |
25 | rs12979409 | chr19 | 2270369 | -0.728732 | 0.794736 | 0.359170 | G | T | 0.274561 | 4 | 9 | 0 |
26 | rs12981166 | chr19 | 2270385 | -1.263369 | 0.723053 | 0.080590 | C | T | 0.293930 | 5 | 21 | 0 |
27 | rs12461851 | chr19 | 2270909 | -0.750163 | 0.664954 | 0.259260 | G | A | 0.274760 | 13 | 30 | 0 |
28 | rs2523174 | chr19 | 2271180 | -0.383928 | 0.659392 | 0.560402 | T | C | 0.897564 | 17 | 26 | 0 |
29 | rs2074457 | chr19 | 2271281 | -0.932944 | 0.659830 | 0.157387 | T | C | 0.787939 | 13 | 37 | 0 |
30 | rs2523175 | chr19 | 2271322 | -0.921468 | 0.637951 | 0.148621 | A | G | 0.898562 | 21 | 59 | 0 |
31 | rs10427038 | chr19 | 28991811 | 0.877927 | 0.832469 | 0.291606 | T | C | 0.369808 | 8 | 3 | 0 |
32 | rs11084765 | chr19 | 34883444 | 0.304640 | 0.796119 | 0.701974 | G | A | 0.610823 | 7 | 5 | 0 |
33 | rs17627 | chr19 | 39923951 | -0.260534 | 0.770169 | 0.735151 | G | A | 0.240815 | 6 | 8 | 0 |
34 | rs2230267 | chr19 | 49469086 | 0.694652 | 0.671977 | 0.301256 | T | C | 0.460663 | 26 | 12 | 0 |
35 | rs11558795 | chr19 | 49507577 | 0.877927 | 0.832469 | 0.291606 | T | C | 0.098443 | 8 | 3 | 0 |
36 | rs11552518 | chr2 | 26455126 | -1.408342 | 0.850564 | 0.097768 | G | A | 0.142372 | 2 | 10 | 0 |
37 | rs1139829 | chr2 | 99225167 | -0.095484 | 0.727266 | 0.895544 | C | T | 0.247604 | 9 | 10 | 0 |
38 | rs12374324 | chr4 | 714214 | -0.877927 | 0.832469 | 0.291606 | G | A | 0.198283 | 3 | 8 | 0 |
39 | rs4563584 | chr5 | 179049328 | -0.366771 | 0.761545 | 0.630080 | G | A | 0.401158 | 6 | 9 | 0 |
40 | rs66479467 | chr5 | 179049671 | -1.048146 | 0.766729 | 0.171614 | A | C | 0.400559 | 4 | 13 | 0 |
41 | rs8384 | chr6 | 26056071 | 0.877162 | 0.726145 | 0.227059 | G | C | 0.116214 | 16 | 6 | 0 |
42 | rs2230653 | chr6 | 26056603 | -1.420418 | 0.661120 | 0.031674 | G | A | 0.174920 | 10 | 51 | 0 |
43 | rs2393593 | chr6 | 26285682 | -0.332649 | 0.712908 | 0.640780 | T | C | 0.790535 | 9 | 13 | 0 |
44 | rs1049346 | chr6 | 38670836 | 0.624300 | 0.768037 | 0.416303 | G | A | 0.564696 | 10 | 5 | 0 |
45 | rs2010963 | chr6 | 43738349 | 0.662938 | 0.640281 | 0.300488 | C | G | 0.673922 | 46 | 22 | 0 |
46 | rs3024998 | chr6 | 43745576 | -0.877927 | 0.832469 | 0.291606 | C | T | 0.331470 | 3 | 8 | 0 |
47 | rs10862 | chr6 | 166755974 | -0.366771 | 0.761545 | 0.630080 | T | C | 0.824281 | 6 | 9 | 0 |
48 | rs11168 | chr6 | 166755978 | -0.424885 | 0.785039 | 0.588351 | A | C | 0.824281 | 5 | 8 | 0 |
49 | rs1044059 | chr6 | 167369896 | 0.000000 | 0.710646 | 1.000000 | C | A | 0.673722 | 11 | 11 | 0 |
50 | rs12717 | chr6 | 170862299 | -0.366771 | 0.761545 | 0.630080 | G | C | 0.450080 | 6 | 9 | 0 |
51 | rs6720 | chr7 | 75677503 | 0.106749 | 0.741191 | 0.885482 | C | T | 0.522165 | 9 | 8 | 0 |
52 | rs12338 | chr8 | 11710887 | 0.979535 | 0.772433 | 0.204756 | G | C | 0.395966 | 12 | 4 | 0 |
53 | rs2449508 | chr8 | 98788260 | 0.505350 | 0.816775 | 0.536105 | G | T | 0.083666 | 7 | 4 | 0 |
54 | rs4740427 | chr9 | 134151862 | 0.121029 | 0.758486 | 0.873223 | G | T | 0.353834 | 8 | 7 | 0 |
HNRNPC[HNRNPC["pval"] < p_cutoff]
rsID | chr | pos | beta | se | pval | ref | alt | freq | #ref | #alt | #either | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
22 | rs2840759 | chr1 | 28834153 | -1.031958 | 0.519901 | 0.047154 | G | T | 0.207867 | 29 | 88 | 0 |
194 | rs6498107 | chr16 | 8909289 | -1.417305 | 0.715121 | 0.047489 | A | G | 0.885982 | 3 | 14 | 0 |
197 | rs7199167 | chr16 | 8919414 | -1.597556 | 0.653389 | 0.014484 | T | A | 0.392572 | 4 | 23 | 0 |
209 | rs432477 | chr16 | 23600572 | -1.560469 | 0.782937 | 0.046251 | T | C | 0.326278 | 2 | 11 | 0 |
210 | rs703773 | chr16 | 23653268 | -1.532484 | 0.608424 | 0.011776 | G | A | 0.078474 | 6 | 32 | 0 |
218 | rs62030049 | chr16 | 50572708 | -1.436739 | 0.575462 | 0.012536 | A | G | 0.150759 | 9 | 43 | 0 |
219 | rs35419407 | chr16 | 50575018 | -1.668422 | 0.648371 | 0.010075 | C | A | 0.119409 | 4 | 25 | 0 |
270 | rs28644774 | chr16 | 89472180 | 1.560469 | 0.782937 | 0.046251 | G | C | 0.518171 | 11 | 2 | 0 |
289 | rs11078307 | chr17 | 15411837 | 1.560469 | 0.782937 | 0.046251 | G | A | 0.675719 | 11 | 2 | 0 |
324 | rs2727320 | chr17 | 61865324 | 1.560469 | 0.782937 | 0.046251 | T | C | 0.732628 | 11 | 2 | 0 |
330 | rs1177972 | chr17 | 70563472 | -1.585496 | 0.698833 | 0.023282 | G | A | 0.389776 | 3 | 17 | 0 |
346 | rs4239021 | chr17 | 80531825 | -1.560469 | 0.782937 | 0.046251 | T | C | 0.460663 | 2 | 11 | 0 |
405 | rs11692629 | chr2 | 42498113 | -1.634283 | 0.694294 | 0.018579 | C | T | 0.232428 | 3 | 18 | 0 |
409 | rs1439225 | chr2 | 42512930 | -1.242851 | 0.623718 | 0.046300 | G | C | 0.273163 | 6 | 23 | 0 |
445 | rs6720885 | chr2 | 99971288 | -1.560469 | 0.782937 | 0.046251 | A | G | 0.407748 | 2 | 11 | 0 |
448 | rs6542994 | chr2 | 101451520 | -1.417305 | 0.715121 | 0.047489 | A | G | 0.436102 | 3 | 14 | 0 |
498 | rs11651 | chr2 | 216229691 | 1.597556 | 0.653389 | 0.014484 | A | G | 0.282748 | 23 | 4 | 0 |
510 | rs1250259 | chr2 | 216300481 | -1.432458 | 0.665643 | 0.031398 | T | A | 0.795128 | 4 | 19 | 0 |
527 | rs13034294 | chr2 | 236675429 | 1.477490 | 0.709174 | 0.037215 | G | A | 0.453474 | 15 | 3 | 0 |
540 | rs6055809 | chr20 | 8407790 | 1.533378 | 0.703771 | 0.029346 | C | T | 0.102037 | 16 | 3 | 0 |
607 | rs9838739 | chr3 | 46137188 | -1.476186 | 0.566418 | 0.009156 | T | C | 0.247404 | 10 | 50 | 1 |
615 | rs73093101 | chr3 | 61554356 | -1.560469 | 0.782937 | 0.046251 | A | T | 0.370008 | 2 | 11 | 0 |
692 | rs6896817 | chr5 | 179267394 | 1.560469 | 0.782937 | 0.046251 | G | A | 0.490216 | 11 | 2 | 0 |
707 | rs9368904 | chr6 | 36069349 | 1.417305 | 0.715121 | 0.047489 | C | A | 0.185503 | 14 | 3 | 0 |
792 | rs34931322 | chr8 | 131534469 | -1.326326 | 0.548616 | 0.015624 | A | G | 0.606629 | 14 | 59 | 1 |
IGF2BP2[IGF2BP2["pval"] < p_cutoff]
rsID | chr | pos | beta | se | pval | ref | alt | freq | #ref | #alt | #either | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
166 | rs59267791 | chr19 | 2248151 | 1.385637 | 0.674851 | 0.040048 | T | C | 0.104433 | 60 | 12 | 1 |
213 | rs11677797 | chr2 | 97559758 | 1.601917 | 0.721375 | 0.026375 | G | A | 0.428514 | 33 | 5 | 0 |
233 | rs7445 | chr22 | 21977046 | -1.483447 | 0.672319 | 0.027352 | C | T | 0.313898 | 12 | 68 | 0 |
234 | rs1063311 | chr22 | 22116466 | 1.704536 | 0.673744 | 0.011408 | C | T | 0.332668 | 76 | 10 | 0 |
259 | rs2010963 | chr6 | 43738349 | 1.611538 | 0.774645 | 0.037493 | C | G | 0.673922 | 20 | 3 | 0 |
281 | rs1861000 | chr7 | 120725353 | -1.529486 | 0.725559 | 0.035030 | T | C | 0.323283 | 5 | 30 | 0 |
313 | rs2296710 | chr9 | 134021629 | 1.435647 | 0.654628 | 0.028302 | A | G | 0.346446 | 112 | 21 | 0 |
Posterior probability of the genotypes, gt, across all samples:
g0=homozygous reference
g1=heterozygous
g2=homozygous alternate
FTO_geno = pd.read_csv(f"{path}/FTO.wasp_remapped.qual10.dedup.quasar.genotype.tsv", header = None, sep = "\t",
usecols = [0,1,3,4,5,6], names = ["chr", "pos", "rsID", "g0", "g1", "g2"])
FTO_geno.head()
chr | pos | rsID | g0 | g1 | g2 | |
---|---|---|---|---|---|---|
0 | chr1 | 990532 | rs4308920 | 9.955845e-01 | 0.004415 | 5.617994e-17 |
1 | chr1 | 990805 | rs2799073 | 1.623563e-04 | 0.999838 | 2.644136e-10 |
2 | chr1 | 990942 | rs28632009 | 9.987619e-01 | 0.001238 | 6.685915e-20 |
3 | chr1 | 990983 | rs8014 | 1.714672e-08 | 1.000000 | 1.283906e-13 |
4 | chr1 | 1152630 | rs11721 | 9.940118e-01 | 0.005988 | 1.032281e-16 |
Exported from analysis/20180504_ASE_res.ipynb
committed by Min Qiao on Wed May 23 17:25:54 2018 revision 8, 8aa7a12