M6A

Review ASE results

rpy2 assistant

In [1]:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import os
import pandas as pd, numpy as np
from collections import OrderedDict
from rpy2.robjects.vectors import DataFrame, FloatVector, IntVector, StrVector, ListVector
path = os.path.expanduser("~/Documents/m6A/Data/ASE/samples")
readRDS = robjects.r['readRDS']
p_cutoff = 0.05

Import all inference on ASE for an individual, and QuASAR input.

beta: estimates of allelic imbalance
se: standard errors
pval: pvalues from an LRT for ASE
rsID: SNP ID
freq: SNP allele frequency
#ref: Number of reads mapping to the reference allele
#alt: Number of reads mapping to the alternate allele
#either: Number of reads not mapping to either allele
In [2]:
def RDS2df(RBP):
    df = readRDS(f'{path}/{RBP}.wasp_remapped.qual10.dedup.quasar.ase.rds')
    df = pandas2ri.ri2py(df)
    tmp = df[0][0]
    RBP_df = pandas2ri.ri2py(tmp)
    RBP_df = RBP_df.rename(columns = {"annotations.rsID": "rsID", "annotations.chr": "chr", 
                                      "annotations.pos0": "pos", "betas": "beta", "betas.se": "se", 
                                      "pval2.het.ind.": "pval"})
    RBP_input = pd.read_table(f"{path}/{RBP}.wasp_remapped.qual10.dedup.quasar.in.gz", compression = "gzip", 
                              sep = "\t", header = None, usecols = [0,1,3,4,5,6,7,8,9], 
                              names = ["chr", "pos", "ref", "alt", "rsID", "freq", "#ref", "#alt", "#either"])
    RBP_df = pd.merge(RBP_df, RBP_input, on = ["chr", "pos", "rsID"], how = "left")
    RBP_df.to_csv(f'{path}/{RBP}.quasar.snp_info.txt', sep = "\t", index = False)
    return RBP_df
In [3]:
FTO = RDS2df("FTO")
HNRNPC = RDS2df("HNRNPC")
IGF2BP2 = RDS2df("IGF2BP2")
In [4]:
FTO[FTO["pval"] < p_cutoff*3]
Out[4]:
rsID chr pos beta se pval ref alt freq #ref #alt #either
11 rs1695 chr11 67352688 1.351097 0.783640 0.084684 A G 0.352636 14 3 0
14 rs4902 chr12 57108202 1.187061 0.708578 0.093881 A G 0.607628 23 6 0
23 rs72984031 chr19 872088 -1.223893 0.795985 0.124151 A G 0.176717 3 12 0
24 rs891206 chr19 872254 -1.006632 0.674080 0.135347 G A 0.441693 10 31 0
26 rs12981166 chr19 2270385 -1.263369 0.723053 0.080590 C T 0.293930 5 21 0
30 rs2523175 chr19 2271322 -0.921468 0.637951 0.148621 A G 0.898562 21 59 0
36 rs11552518 chr2 26455126 -1.408342 0.850564 0.097768 G A 0.142372 2 10 0
42 rs2230653 chr6 26056603 -1.420418 0.661120 0.031674 G A 0.174920 10 51 0
In [5]:
FTO
Out[5]:
rsID chr pos beta se pval ref alt freq #ref #alt #either
0 rs8014 chr1 990983 0.505350 0.816775 0.536105 G A 0.922324 7 4 0
1 rs6684539 chr1 11968272 -0.245987 0.621327 0.692174 A G 0.616813 48 63 0
2 rs10874744 chr1 93306316 0.929733 0.722990 0.198459 G A 0.691893 17 6 0
3 rs3805 chr1 205683354 -0.708717 0.761321 0.351903 T G 0.233626 5 11 0
4 rs2796271 chr1 207941926 -0.505350 0.816775 0.536105 A G 0.671126 4 7 0
5 rs3758410 chr10 17271399 -0.106749 0.741191 0.885482 G C 0.414936 8 9 0
6 rs5009539 chr11 5269582 0.310123 0.647507 0.631975 G A 0.477436 31 22 0
7 rs1065686 chr11 5269583 0.310123 0.647507 0.631975 T G 0.718450 31 22 0
8 rs61893083 chr11 5269584 0.310123 0.647507 0.631975 G A 0.718450 31 22 0
9 rs62755960 chr11 5269585 0.310123 0.647507 0.631975 A G 0.718450 31 22 0
10 rs2073686 chr11 8707082 -0.551127 0.628788 0.380763 G A 0.183107 32 59 0
11 rs1695 chr11 67352688 1.351097 0.783640 0.084684 A G 0.352636 14 3 0
12 rs4891 chr11 67353969 0.546706 0.747985 0.464837 T C 0.356030 11 6 0
13 rs1944108 chr11 93464174 -0.461441 0.754252 0.540679 A C 0.370407 6 10 0
14 rs4902 chr12 57108202 1.187061 0.708578 0.093881 A G 0.607628 23 6 0
15 rs55671249 chr16 591726 0.505350 0.816775 0.536105 C A 0.054712 7 4 0
16 rs2279258 chr16 88724346 -0.165265 0.809792 0.838289 G T 0.498403 5 6 0
17 rs4782500 chr16 89008541 -0.165265 0.809792 0.838289 C T 0.229633 5 6 0
18 rs13329773 chr16 89023571 0.505350 0.816775 0.536105 G T 0.166134 7 4 0
19 rs17848934 chr17 80054374 0.000000 0.793392 1.000000 C A 0.177516 6 6 0
20 rs62078748 chr17 80055324 -0.624454 0.804751 0.437773 C T 0.165136 4 8 0
21 rs906807 chr18 9117866 -1.070189 0.811626 0.187311 T C 0.778954 3 10 0
22 rs1683593 chr19 867734 0.505350 0.816775 0.536105 A G 0.244209 7 4 0
23 rs72984031 chr19 872088 -1.223893 0.795985 0.124151 A G 0.176717 3 12 0
24 rs891206 chr19 872254 -1.006632 0.674080 0.135347 G A 0.441693 10 31 0
25 rs12979409 chr19 2270369 -0.728732 0.794736 0.359170 G T 0.274561 4 9 0
26 rs12981166 chr19 2270385 -1.263369 0.723053 0.080590 C T 0.293930 5 21 0
27 rs12461851 chr19 2270909 -0.750163 0.664954 0.259260 G A 0.274760 13 30 0
28 rs2523174 chr19 2271180 -0.383928 0.659392 0.560402 T C 0.897564 17 26 0
29 rs2074457 chr19 2271281 -0.932944 0.659830 0.157387 T C 0.787939 13 37 0
30 rs2523175 chr19 2271322 -0.921468 0.637951 0.148621 A G 0.898562 21 59 0
31 rs10427038 chr19 28991811 0.877927 0.832469 0.291606 T C 0.369808 8 3 0
32 rs11084765 chr19 34883444 0.304640 0.796119 0.701974 G A 0.610823 7 5 0
33 rs17627 chr19 39923951 -0.260534 0.770169 0.735151 G A 0.240815 6 8 0
34 rs2230267 chr19 49469086 0.694652 0.671977 0.301256 T C 0.460663 26 12 0
35 rs11558795 chr19 49507577 0.877927 0.832469 0.291606 T C 0.098443 8 3 0
36 rs11552518 chr2 26455126 -1.408342 0.850564 0.097768 G A 0.142372 2 10 0
37 rs1139829 chr2 99225167 -0.095484 0.727266 0.895544 C T 0.247604 9 10 0
38 rs12374324 chr4 714214 -0.877927 0.832469 0.291606 G A 0.198283 3 8 0
39 rs4563584 chr5 179049328 -0.366771 0.761545 0.630080 G A 0.401158 6 9 0
40 rs66479467 chr5 179049671 -1.048146 0.766729 0.171614 A C 0.400559 4 13 0
41 rs8384 chr6 26056071 0.877162 0.726145 0.227059 G C 0.116214 16 6 0
42 rs2230653 chr6 26056603 -1.420418 0.661120 0.031674 G A 0.174920 10 51 0
43 rs2393593 chr6 26285682 -0.332649 0.712908 0.640780 T C 0.790535 9 13 0
44 rs1049346 chr6 38670836 0.624300 0.768037 0.416303 G A 0.564696 10 5 0
45 rs2010963 chr6 43738349 0.662938 0.640281 0.300488 C G 0.673922 46 22 0
46 rs3024998 chr6 43745576 -0.877927 0.832469 0.291606 C T 0.331470 3 8 0
47 rs10862 chr6 166755974 -0.366771 0.761545 0.630080 T C 0.824281 6 9 0
48 rs11168 chr6 166755978 -0.424885 0.785039 0.588351 A C 0.824281 5 8 0
49 rs1044059 chr6 167369896 0.000000 0.710646 1.000000 C A 0.673722 11 11 0
50 rs12717 chr6 170862299 -0.366771 0.761545 0.630080 G C 0.450080 6 9 0
51 rs6720 chr7 75677503 0.106749 0.741191 0.885482 C T 0.522165 9 8 0
52 rs12338 chr8 11710887 0.979535 0.772433 0.204756 G C 0.395966 12 4 0
53 rs2449508 chr8 98788260 0.505350 0.816775 0.536105 G T 0.083666 7 4 0
54 rs4740427 chr9 134151862 0.121029 0.758486 0.873223 G T 0.353834 8 7 0
In [6]:
HNRNPC[HNRNPC["pval"] < p_cutoff]
Out[6]:
rsID chr pos beta se pval ref alt freq #ref #alt #either
22 rs2840759 chr1 28834153 -1.031958 0.519901 0.047154 G T 0.207867 29 88 0
194 rs6498107 chr16 8909289 -1.417305 0.715121 0.047489 A G 0.885982 3 14 0
197 rs7199167 chr16 8919414 -1.597556 0.653389 0.014484 T A 0.392572 4 23 0
209 rs432477 chr16 23600572 -1.560469 0.782937 0.046251 T C 0.326278 2 11 0
210 rs703773 chr16 23653268 -1.532484 0.608424 0.011776 G A 0.078474 6 32 0
218 rs62030049 chr16 50572708 -1.436739 0.575462 0.012536 A G 0.150759 9 43 0
219 rs35419407 chr16 50575018 -1.668422 0.648371 0.010075 C A 0.119409 4 25 0
270 rs28644774 chr16 89472180 1.560469 0.782937 0.046251 G C 0.518171 11 2 0
289 rs11078307 chr17 15411837 1.560469 0.782937 0.046251 G A 0.675719 11 2 0
324 rs2727320 chr17 61865324 1.560469 0.782937 0.046251 T C 0.732628 11 2 0
330 rs1177972 chr17 70563472 -1.585496 0.698833 0.023282 G A 0.389776 3 17 0
346 rs4239021 chr17 80531825 -1.560469 0.782937 0.046251 T C 0.460663 2 11 0
405 rs11692629 chr2 42498113 -1.634283 0.694294 0.018579 C T 0.232428 3 18 0
409 rs1439225 chr2 42512930 -1.242851 0.623718 0.046300 G C 0.273163 6 23 0
445 rs6720885 chr2 99971288 -1.560469 0.782937 0.046251 A G 0.407748 2 11 0
448 rs6542994 chr2 101451520 -1.417305 0.715121 0.047489 A G 0.436102 3 14 0
498 rs11651 chr2 216229691 1.597556 0.653389 0.014484 A G 0.282748 23 4 0
510 rs1250259 chr2 216300481 -1.432458 0.665643 0.031398 T A 0.795128 4 19 0
527 rs13034294 chr2 236675429 1.477490 0.709174 0.037215 G A 0.453474 15 3 0
540 rs6055809 chr20 8407790 1.533378 0.703771 0.029346 C T 0.102037 16 3 0
607 rs9838739 chr3 46137188 -1.476186 0.566418 0.009156 T C 0.247404 10 50 1
615 rs73093101 chr3 61554356 -1.560469 0.782937 0.046251 A T 0.370008 2 11 0
692 rs6896817 chr5 179267394 1.560469 0.782937 0.046251 G A 0.490216 11 2 0
707 rs9368904 chr6 36069349 1.417305 0.715121 0.047489 C A 0.185503 14 3 0
792 rs34931322 chr8 131534469 -1.326326 0.548616 0.015624 A G 0.606629 14 59 1
In [8]:
IGF2BP2[IGF2BP2["pval"] < p_cutoff]
Out[8]:
rsID chr pos beta se pval ref alt freq #ref #alt #either
166 rs59267791 chr19 2248151 1.385637 0.674851 0.040048 T C 0.104433 60 12 1
213 rs11677797 chr2 97559758 1.601917 0.721375 0.026375 G A 0.428514 33 5 0
233 rs7445 chr22 21977046 -1.483447 0.672319 0.027352 C T 0.313898 12 68 0
234 rs1063311 chr22 22116466 1.704536 0.673744 0.011408 C T 0.332668 76 10 0
259 rs2010963 chr6 43738349 1.611538 0.774645 0.037493 C G 0.673922 20 3 0
281 rs1861000 chr7 120725353 -1.529486 0.725559 0.035030 T C 0.323283 5 30 0
313 rs2296710 chr9 134021629 1.435647 0.654628 0.028302 A G 0.346446 112 21 0

Posterior probability of the genotypes, gt, across all samples:

g0=homozygous reference
g1=heterozygous
g2=homozygous alternate
In [11]:
FTO_geno = pd.read_csv(f"{path}/FTO.wasp_remapped.qual10.dedup.quasar.genotype.tsv", header = None, sep = "\t", 
                       usecols = [0,1,3,4,5,6], names = ["chr", "pos", "rsID", "g0", "g1", "g2"])
FTO_geno.head()
Out[11]:
chr pos rsID g0 g1 g2
0 chr1 990532 rs4308920 9.955845e-01 0.004415 5.617994e-17
1 chr1 990805 rs2799073 1.623563e-04 0.999838 2.644136e-10
2 chr1 990942 rs28632009 9.987619e-01 0.001238 6.685915e-20
3 chr1 990983 rs8014 1.714672e-08 1.000000 1.283906e-13
4 chr1 1152630 rs11721 9.940118e-01 0.005988 1.032281e-16

© 2018 Min Qiao at He Lab, University of Chicago

Exported from analysis/20180504_ASE_res.ipynb committed by Min Qiao on Wed May 23 17:25:54 2018 revision 8, 8aa7a12