import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from matplotlib_venn import venn3_unweighted as venn3

Extracting data¶

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/idp-ego/GUI/algorithms/trio/"
data_dir = root_dir + 'data/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive

# path = Path(base_dir + 'data/bears')
# dest.mkdir(parents=True, exist_ok=True) I want only existing folders

e1=pd.read_csv(data_dir+'COL1538-V4-2.csv',low_memory=0)
e1.head(5)

e1[e1['chr']=='1'].head(5) #from 0 to 5-1

u=pd.DataFrame({'chr':['1','x','y'],'value':['val1','val2','val3']}) #example of a df creation
u

chr_e2=['1','2']
e2_filter=[x in chr_e2 for x in tuple(e1['chr'])] #search for list comprehension vs generator
#print(e2_filter)
e2=e1[e2_filter]
e2.tail(5)

chr_e3=['1','12']
e3_filter=[x in chr_e3 for x in tuple(e1['chr'])] #generator list generator
#print(e3_filter)
e3=e1[e3_filter]
print(e3.columns)
e3.tail(5)

Index(['chr', 'pos', 'ID', 'refNt', 'altNt', 'function', 'gene', 'transcrptID',
       'is_CCDS', 'is_RefSeq', 'AAChange', 'AnnoOther', 'zygo', 'BL',
       'gnomADGenomes_AF', 'gnomADEx_AF', 'ESP6500All_AF', 'gnomADEx_AFR_AF',
       'gnomADEx_AMR_AF', 'gnomADEx_EAS_AF', 'gnomADEx_FIN_AF',
       'gnomADEx_NFE_AF', 'gnomADEx_OTH_AF', 'gnomADEx_SAS_AF',
       'MiddleEast_AF', 'Sift_Pred', 'Polyphen2HVAR_Pred', 'CADD_Phred_V1.3',
       'MSC_99%', 'MSC_99%_Pred', 'GDI', 'varQual', 'AD', 'DP', 'MQ', 'QD',
       'Clinvar_AlleleID', 'KnownPID', 'PredictedPID', 'P-value', 'Route',
       'gnomADGen_SegDup'],
      dtype='object')

Subsets combinations¶

available sets: e1,e2,e3 (exoms)

possible subsets:

${e_{_{i}}}\cap\ {e_{_{j}}}\cap\ {e_{_{k}}} \ |_{i,j,k\in \{A,B,C\}\ \wedge\ i\neq j\neq k}$
${e_{i}}\cap\ {e_{j}}\cap \ {{e^c}_{k}} \ |_{i,j,k\in \{A,B,C\}\ \wedge\ i\neq j\neq k}$
${e_{i}}\cap \ {{e^c}_{j}} \cap \ {{e^c}_{k}} \ |_{i,j,k\in \{A,B,C\}\ \wedge\ i\neq j\neq k}$

Figure 1. Venn diagram with C as the pacient, A as affected parent and B as the unaffected parent.

Requirements:

y
must match a triplet of (pos, ref nt, gene)

apriori_common=len(set(list(e1[e1['chr']=='1']['gene'])))
apriori_unique_e2=len(set(list(e2[e2['chr']=='2']['gene'])))
apriori_unique_e3=len(set(list(e3[e3['chr']=='12']['gene'])))
print("len(apriori_common)={}".format(apriori_common))
print("len(apriori_unique_e2)={}".format(apriori_unique_e2))
print("len(apriori_unique_e3)={}".format(apriori_unique_e3))

len(apriori_common)=1973
len(apriori_unique_e2)=1286
len(apriori_unique_e3)=1040

genes_e1=set(list(e1['gene']))
genes_e2=set(list(e2['gene']))
genes_e3=set(list(e3['gene']))

def searchforgene(gene,genes_set):
  positions=[]
  genes=[]
  for i in range(len(genes_set)):
    if gene==genes_set[i]:
      positions.append(i)
      genes.append(genes_set[i])
  return (positions,genes)
# test
print(list(e2['gene'])[:20])
print(len(list(e2['gene'])))
print(searchforgene('DDX11L1',list(genes_e2))[0])
print(searchforgene('DDX11L1',list(genes_e2))[1])
print(searchforgene('DDX11L1',list(e2['gene']))[0])

['DDX11L1', 'DDX11L1', 'DDX11L1', 'WASH7P', 'OR4F5', 'AL627309.1', 'RP11-206L10.10', 'RP11-206L10.10', 'LINC00115', 'LINC00115', 'LINC00115', 'LINC00115', 'LINC00115', 'LINC00115', 'LINC01128', 'LINC01128', 'LINC01128', 'LINC01128', 'FAM41C', 'TUBB8P11']
22507
[132]
['DDX11L1']
[0, 1, 2]

common=genes_e1.intersection(genes_e2).intersection(genes_e3)
len(common)

1973

The result shown above, shows that len(a_priori_common) is equal to the z subset (see fig. 1).

Constructing the generic function¶

Selecting individual tuples from the exomes files : extract_tuples_pro function¶

def extract_tuples_pro(e,prop=("gene","pos","refNt")):
    """
    Must extract the desired parameters from the exome "e"
    in a list of tuples(default triples)
    """
    return e[list(prop)].values

def extract_tuples(e,prop=("gene","pos","refNt")):
    """
    Must extract the desired parameters from the exome "e"
    in a list of tuples(default triples)
    """
    r={}
    for i in range(len(prop)):
        r[prop[i]]=tuple(e[prop[i]])

    # rr[0]=("a_gen","a_pos","a_refNt")
    rr=[]
    for i in range(len(r[prop[0]])):
        rri=[r[j][i] for j in prop]#list comprehension
        rr.append(rri)           

    return r,rr

# test
print(extract_tuples(e2)[1][:5])
print(len(extract_tuples(e2)[0]["gene"]))
print(len(extract_tuples(e2)[0]["pos"]))
print(len(extract_tuples(e2)[0]["refNt"]))

[['DDX11L1', 13273, 'G'], ['DDX11L1', 13417, 'C'], ['DDX11L1', 13656, 'CAG'], ['WASH7P', 15211, 'T'], ['OR4F5', 69511, 'A']]
22507
22507
22507

Splitting by subsets : subset function¶

def subset(Ax,Bx,Cx,prop=("gene","pos","refNt")):
    """
    Must obtain the different subsets to contruct the Venn
    diagram.
    A and B: parents
    C: Child
    """
    A=frozenset(tuple(m) for m in extract_tuples_pro(Ax,prop)) #as set 1
    B=frozenset(tuple(m) for m in extract_tuples_pro(Bx,prop)) #as set 2
    C=frozenset(tuple(m) for m in extract_tuples_pro(Cx,prop)) #as set 3

    a=((A-B)-C)             #001
    b=((B-A)-C)             #010
    c=((C-B)-A)             #100
    y=((A&C)-B)             #101
    w=((A&B)-C)             #011
    x=((B&C)-A)             #110
    z=A.intersection(B,C)   #111

    combinations=[a,b,w,c,y,x,z]
    merge_vector=[bin(i)[2:].zfill(3) for i in range (1,2**3)]
    r=dict(zip(merge_vector,combinations))
    return r

# debug
merge_vector=[bin(i)[2:].zfill(3) for i in range (1,2**3)]
print(merge_vector)

['001', '010', '011', '100', '101', '110', '111']

Venn Diagram¶

# Venn Diagramm - ilustrative example

A,B,C=set('23 xk678'),set('13 ykjl'),set('12 zkjl678')
Vtags=[A,B,C]

a=((A-B)-C)             #001
b=((B-A)-C)             #010
c=((C-B)-A)             #100
y=((A&C)-B)             #101
w=((A&B)-C)             #011
x=((B&C)-A)             #110
z=A.intersection(B,C)   #111


combinations=[len(i) for i in [a,b,w,c,y,x,z]]
print(combinations)
merge_vector=[bin(i)[2:].zfill(3) for i in range (1,2**3)]
r=dict(zip(merge_vector,combinations))
T=[a,b,w,c,y,x,z]
ñ=dict(zip(merge_vector,T))
print(r)
print(merge_vector)
#(Abc, aBc, ABc, abC, AbC, aBC, ABC)
tags=['a','b','w','c','y','x','z']
q=dict(zip(merge_vector,tags))
plt.figure(figsize=(10,8))
v=venn3(combinations,Vtags)
for i in merge_vector:
    try:
        t=v.get_label_by_id(i).get_text()
        v.get_label_by_id(i).set_text(f"id:{i}\n{q[i[::-1]]}:{t}\n{ñ[i[::-1]]}")
    except:
        print("debuging bro, testing")

plt.show()

[1, 1, 1, 1, 4, 3, 2]
{'001': 1, '010': 1, '011': 1, '100': 1, '101': 4, '110': 3, '111': 2}
['001', '010', '011', '100', '101', '110', '111']

Venn Function¶

def venn(r,Vtags=('A','B','C')):
    plt.figure(figsize=(10,8))
    #(Abc, aBc, ABc, abC, AbC, aBC, ABC) <- format of venn3
    # str[::-1] is used to flip strings
    mv=[bin(i)[2:].zfill(3) for i in range (1,2**3)]
    tags=['a','b','w','c','y','x','z']
    q=dict(zip(mv,tags))
    v=venn3([len(r[i]) for i in mv],Vtags)
    for i in mv:
        t=v.get_label_by_id(i).get_text()
        v.get_label_by_id(i).set_text(f"id:{i}\n{q[i[::-1]]}\n{t}")
    plt.show()
    return None

Implementing and testing the function with sets¶

# a_priori test
prop=("gene","pos","refNt")
apriori_common=len(e1[list(prop)])
print("len(apriori_common)={}".format(apriori_common))
print(e1[list(prop)].values[:5])
e1[list(prop)].head(5)

len(apriori_common)=138013
[['DDX11L1' 13273 'G']
 ['DDX11L1' 13417 'C']
 ['DDX11L1' 13656 'CAG']
 ['WASH7P' 15211 'T']
 ['OR4F5' 69511 'A']]

A=frozenset(tuple(u) for u in extract_tuples(e1)[1])
B=frozenset(tuple(u) for u in extract_tuples(e2)[1])
C=frozenset(tuple(u) for u in extract_tuples(e3)[1])
z=A.intersection(B,C)
print(len(z))
y=len((A&C)-B)
print(y)

13076
7222

# proving the triplets actually match in the analysed case (z)
aux1=e1[["chr","gene","pos","refNt"]]
print(type(aux1))
aux1=aux1[aux1['chr']=='1']
print(type(aux1))
print(len(aux1))
aux1=aux1[["gene","pos","refNt"]].values
aux1=frozenset(tuple(u) for u in aux1)
print(len(aux1))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
13086
13076

Time test¶

# time test
%timeit -n 10 -r 5 extract_tuples(e1)

10 loops, best of 5: 107 ms per loop

%timeit -n 10 -r 5 extract_tuples_pro(e1)

10 loops, best of 5: 12.1 ms per loop

As a conclusion, the function extract_list_pro spent about 10% of the time spent by the extract_tuples function

Extending to the real case¶

e1=pd.read_csv(data_dir+'COL1541-V4-2.csv',low_memory=0) #father
e2=pd.read_csv(data_dir+'COL1539-V4-2.csv',low_memory=0) #mother
e3=pd.read_csv(data_dir+'COL1538-V4-2.csv',low_memory=0) #child

Example 1: prop=("gene","pos","refNt") triplet¶

There is no need to call prop=("gene","pos","refNt") as this is a default argument of the subset function.

r=subset(e1,e2,e3)
venn(r,Vtags=("Parent(A) : Symptomatic","Parent(B) : Asymptomatic","Child(C) : Symptomatic"))#217997

Data$\rightarrow$DataFrame¶

Because each subset is formed of unique tuples the DataFrame constructor must be used (then if desired, convert it to excel. Also to alocate the desired subset, the id displayed in the section Example 1 must be used as a key to access every element of the dictionary.

prop=("gene","pos","refNt")
pd.DataFrame(r["111"],columns=prop).head(5)

Testing¶

print(f"len(r['101'])={len(r['101'])}")
print(f"len(r['101']&r['001'])={len(r['100'].intersection(r['001']))}")

len(r['101'])=26567
len(r['101']&r['001'])=0

Note that len(r['101']&r['001'])=0 makes sense due to the fact that the subsets are already separed

# testing subset function with Venn diagram 'created' from scratch 
At=frozenset(tuple(m) for m in extract_tuples_pro(e1))
Bt=frozenset(tuple(m) for m in extract_tuples_pro(e2))
Ct=frozenset(tuple(m) for m in extract_tuples_pro(e3))
a=((At.difference(Bt)).difference(Ct))             #001
b=((Bt.difference(At)).difference(Ct))             #010
c=((Ct.difference(Bt)).difference(At))             #100
y=((At.intersection(Ct)).difference(Bt))           #101
w=((At.intersection(Bt)).difference(Ct))           #011
x=((Bt.intersection(Ct)).difference(At))           #110
z=At.intersection(Bt,Ct)                           #111
venn3(subsets=[len(i) for i in [a,b,w,c,y,x,z]])

<matplotlib_venn._common.VennDiagram at 0x7ff1ee45bd68>

Example 2: Custom prop parameter¶

Setting prop=("gene") will establish a different filter for the analysis

r2=subset(e1,e2,e3,prop=("gene",))
venn(r2,Vtags=("Parent(A) : Symptomatic","Parent(B) : Asymptomatic","Child(C) : Symptomatic"))

Example 3: Custom prop parameter¶

Setting prop=("pos") will establish a different filter for the analysis

r3=subset(e1,e2,e3,prop=("pos",))
venn(r3,Vtags=("Parent(A) : Symptomatic","Parent(B) : Asymptomatic","Child(C) : Symptomatic"))#217906

Example 4: Custom prop parameter¶

r4=subset(e1,e2,e3,prop=("gene","pos","refNt","altNt"))
venn(r4,Vtags=("Parent(A) : Symptomatic","Parent(B) : Asymptomatic","Child(C) : Symptomatic"))#218853

Data $\rightarrow$ DataFrame¶

prop2=("gene","pos","refNt","altNt")
pato=pd.DataFrame(r4["101"],columns=prop2).head(5)
pato

pato.to_csv()

',gene,pos,refNt,altNt\n0,ADCY8,131880292,T,A\n1,HDHD1,6976113,C,T\n2,ATP2B2,10400253,C,T\n3,LHB,49519855,T,G\n4,NRK,105132142,C,A\n'

Application: Comparison between exomes analysis¶

#loading files
A=pd.read_csv(root_dir+'data-us/COL1541.V4.2.gnomGN_0.05.csv',low_memory=0) #father
B=pd.read_csv(root_dir+'data-us/COL1539.V4.2.gnomGN_0.05.csv',low_memory=0) #mother
C=pd.read_csv(root_dir+'data-us/COL1538.V4.2.gnomGN_0.05.csv',low_memory=0) #child
#plotting venn
prop_=("gene","pos","refNt","altNt")#illustrative
QQ=subset(A,B,C,prop=prop_)
venn(QQ,Vtags=("Parent A (COL1541)","Parent B (COL1539)","Child C (COL1538)"))

Data $\rightarrow$ DataFrame¶

duck=pd.DataFrame(QQ["011"],columns=prop_).head(10)
duck

	chr	pos	ID	refNt	altNt	function	gene	transcrptID	is_CCDS	is_RefSeq	AAChange	AnnoOther	zygo	BL	gnomADGenomes_AF	gnomADEx_AF	ESP6500All_AF	gnomADEx_AFR_AF	gnomADEx_AMR_AF	gnomADEx_EAS_AF	gnomADEx_FIN_AF	gnomADEx_NFE_AF	gnomADEx_OTH_AF	gnomADEx_SAS_AF	MiddleEast_AF	Sift_Pred	Polyphen2HVAR_Pred	CADD_Phred_V1.3	MSC_99%	MSC_99%_Pred	GDI	varQual	AD	DP	MQ	QD	Clinvar_AlleleID	KnownPID	PredictedPID	P-value	Route	gnomADGen_SegDup
0	1	13273	rs531730856	G	C	nc_mirna	DDX11L1	ENST00000456328	no	no	NaN	intron(DDX11L1:ENST00000518655);nc_mirna(DDX11...	het	no	0.102748	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.42	NaN	NaN	NaN	190	13,11	24	25.1	7.9	NaN	NO	NO	NaN	NaN	yes
1	1	13417	rs777038595	C	CGAGA	nc_mirna	DDX11L1	ENST00000456328	no	no	NaN	intron(DDX11L1:ENST00000450305);nc_mirna(DDX11...	het	no	0.066620	0.112528	NaN	0.015301	0.0	0.055929	0.175127	0.128360	0.134288	0.156572	NaN	NaN	NaN	4.01	NaN	NaN	NaN	17	10,2	12	27.8	1.2	NaN	NO	NO	NaN	NaN	yes
2	1	13656	rs1263393206	CAG	C	essential_splicing	DDX11L1	ENST00000518655	no	no	NaN	nc_mirna(DDX11L1:ENST00000456328);nc_mirna(DDX...	het	no	0.489766	0.029533	NaN	0.006352	0.0	0.003094	0.061983	0.048217	0.030691	0.035543	NaN	NaN	NaN	5.77	NaN	NaN	NaN	35	4,2	6	23.5	5.8	NaN	NO	NO	NaN	NaN	yes
3	1	15211	rs78601809	T	G	intron	WASH7P	ENST00000438504	no	no	NaN	downstream(DDX11L1:ENST00000456328);downstream...	hom	no	0.609542	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.94	NaN	NaN	NaN	128	0,6	6	25.0	21.3	NaN	NO	NO	NaN	NaN	yes
4	1	69511	rs75062661	A	G	missense	OR4F5	ENST00000335137	yes	yes	p.Thr141Ala	NaN	hom	no	0.843526	0.949691	0.240234	0.607496	0.0	0.999439	0.991577	0.972624	0.950621	0.985395	159:76:429	.,T	B	0.05	2.31	LOW	2.32	1237	0,43	43	31.2	28.8	NaN	NO	NO	NaN	NaN	yes

	chr	pos	ID	refNt	altNt	function	gene	transcrptID	is_CCDS	is_RefSeq	AAChange	AnnoOther	zygo	BL	gnomADGenomes_AF	gnomADEx_AF	ESP6500All_AF	gnomADEx_AFR_AF	gnomADEx_AMR_AF	gnomADEx_EAS_AF	gnomADEx_FIN_AF	gnomADEx_NFE_AF	gnomADEx_OTH_AF	gnomADEx_SAS_AF	MiddleEast_AF	Sift_Pred	Polyphen2HVAR_Pred	CADD_Phred_V1.3	MSC_99%	MSC_99%_Pred	GDI	varQual	AD	DP	MQ	QD	Clinvar_AlleleID	KnownPID	PredictedPID	P-value	Route	gnomADGen_SegDup
0	1	13273	rs531730856	G	C	nc_mirna	DDX11L1	ENST00000456328	no	no	NaN	intron(DDX11L1:ENST00000518655);nc_mirna(DDX11...	het	no	0.102748	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.42	NaN	NaN	NaN	190	13,11	24	25.1	7.9	NaN	NO	NO	NaN	NaN	yes
1	1	13417	rs777038595	C	CGAGA	nc_mirna	DDX11L1	ENST00000456328	no	no	NaN	intron(DDX11L1:ENST00000450305);nc_mirna(DDX11...	het	no	0.066620	0.112528	NaN	0.015301	0.0	0.055929	0.175127	0.128360	0.134288	0.156572	NaN	NaN	NaN	4.01	NaN	NaN	NaN	17	10,2	12	27.8	1.2	NaN	NO	NO	NaN	NaN	yes
2	1	13656	rs1263393206	CAG	C	essential_splicing	DDX11L1	ENST00000518655	no	no	NaN	nc_mirna(DDX11L1:ENST00000456328);nc_mirna(DDX...	het	no	0.489766	0.029533	NaN	0.006352	0.0	0.003094	0.061983	0.048217	0.030691	0.035543	NaN	NaN	NaN	5.77	NaN	NaN	NaN	35	4,2	6	23.5	5.8	NaN	NO	NO	NaN	NaN	yes
3	1	15211	rs78601809	T	G	intron	WASH7P	ENST00000438504	no	no	NaN	downstream(DDX11L1:ENST00000456328);downstream...	hom	no	0.609542	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.94	NaN	NaN	NaN	128	0,6	6	25.0	21.3	NaN	NO	NO	NaN	NaN	yes
4	1	69511	rs75062661	A	G	missense	OR4F5	ENST00000335137	yes	yes	p.Thr141Ala	NaN	hom	no	0.843526	0.949691	0.240234	0.607496	0.0	0.999439	0.991577	0.972624	0.950621	0.985395	159:76:429	.,T	B	0.05	2.31	LOW	2.32	1237	0,43	43	31.2	28.8	NaN	NO	NO	NaN	NaN	yes

	chr	pos	ID	refNt	altNt	function	gene	transcrptID	is_CCDS	is_RefSeq	AAChange	AnnoOther	zygo	BL	gnomADGenomes_AF	gnomADEx_AF	ESP6500All_AF	gnomADEx_AFR_AF	gnomADEx_AMR_AF	gnomADEx_EAS_AF	gnomADEx_FIN_AF	gnomADEx_NFE_AF	gnomADEx_OTH_AF	gnomADEx_SAS_AF	MiddleEast_AF	Sift_Pred	Polyphen2HVAR_Pred	CADD_Phred_V1.3	MSC_99%	MSC_99%_Pred	GDI	varQual	AD	DP	MQ	QD	Clinvar_AlleleID	KnownPID	PredictedPID	P-value	Route	gnomADGen_SegDup
22502	2	242707101	rs6756901	A	G	intron	D2HGDH	ENST00000321264	yes	yes	NaN	NaN	het	no	0.603751	0.518218	0.382489	0.871115	0.0	0.585605	0.470520	0.501298	0.519070	0.458265	NaN	NaN	NaN	5.22	0.09	HIGH	11.04	644	29,24	53	60.0	12.2	168071.0	NO	NO	NaN	NaN	no
22503	2	242707477	rs11552660	T	C	utr_3	D2HGDH	ENST00000321264	yes	yes	NaN	NaN	het	no	0.111900	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.44	0.09	HIGH	11.04	105	1,4	5	60.0	21.0	286302.0	NO	NO	NaN	NaN	no
22504	2	242708045	rs113372064	C	T	utr_3	D2HGDH	ENST00000321264	yes	yes	NaN	NaN	hom	no	0.073784	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5.46	0.09	HIGH	11.04	52	0,2	2	60.0	25.9	286337.0	NO	NO	NaN	NaN	no
22505	2	242743651	rs744986	G	A	upstream	AC114730.5	ENST00000437438	no	no	NaN	downstream(GAL3ST2:ENST00000192314)	het	no	0.348165	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.33	NaN	NaN	NaN	145	3,5	8	60.0	18.1	NaN	NO	NO	NaN	NaN	no
22506	2	242813124	rs13016102	T	C	intron	CXXC11	ENST00000343216	yes	yes	NaN	NaN	hom	no	0.989166	0.996225	NaN	0.970301	0.0	1.000000	0.995157	0.996347	0.996717	0.999911	NaN	NaN	NaN	1.94	2.31	LOW	9.35	1920	0,57	57	60.0	33.7	NaN	NO	NO	NaN	NaN	no

	chr	pos	ID	refNt	altNt	function	gene	transcrptID	is_CCDS	is_RefSeq	AAChange	AnnoOther	zygo	BL	gnomADGenomes_AF	gnomADEx_AF	ESP6500All_AF	gnomADEx_AFR_AF	gnomADEx_AMR_AF	gnomADEx_EAS_AF	gnomADEx_FIN_AF	gnomADEx_NFE_AF	gnomADEx_OTH_AF	gnomADEx_SAS_AF	MiddleEast_AF	Sift_Pred	Polyphen2HVAR_Pred	CADD_Phred_V1.3	MSC_99%	MSC_99%_Pred	GDI	varQual	AD	DP	MQ	QD	Clinvar_AlleleID	KnownPID	PredictedPID	P-value	Route	gnomADGen_SegDup
90071	12	133797649	rs147515371	T	G	intron	ANHX	ENST00000419717	yes	yes	NaN	intron(ANHX:ENST00000545940);upstream(AC226150...	hom	no	0.755861	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	8.86	2.31	HIGH	11.48	1130	0,32	32	60.0	24.6	NaN	NO	NO	NaN	NaN	no
90072	12	133803075	rs146938968	C	A	intron	ANHX	ENST00000419717	yes	yes	NaN	intron(ANHX:ENST00000545940)	het	no	0.121501	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	11.46	2.31	HIGH	11.48	506	12,16	28	60.0	17.4	NaN	NO	NO	NaN	NaN	no
90073	12	133808270	rs142638257	C	T	intron	ANHX	ENST00000419717	yes	yes	NaN	intron(ANHX:ENST00000545940)	het	no	0.057811	0.051146	NaN	0.041920	0.0	0.003437	0.081753	0.065813	0.060494	0.053098	NaN	NaN	NaN	6.49	2.31	HIGH	11.48	551	17,15	32	60.0	17.2	NaN	NO	NO	NaN	NaN	no
90074	12	133808275	rs150558535	G	C	intron	ANHX	ENST00000419717	yes	yes	NaN	intron(ANHX:ENST00000545940)	het	no	0.058017	0.051031	NaN	0.041947	0.0	0.003436	0.081648	0.065776	0.060614	0.053132	NaN	NaN	NaN	5.94	2.31	HIGH	11.48	557	13,15	28	60.0	19.9	NaN	NO	NO	NaN	NaN	no
90075	12	133812415	rs141289920	C	T	utr_5	ANHX	ENST00000545940	yes	yes	NaN	upstream(ANHX:ENST00000419717);downstream(RNU6...	hom	no	0.800103	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	7.43	2.31	HIGH	11.48	22	0,2	2	60.0	10.9	NaN	NO	NO	NaN	NaN	no

	gene	pos	refNt
0	HAUS6	19086979	C
1	CMAHP	25106745	A
2	CCDC88C	91751808	G
3	MTMR12	32242381	G
4	RHO	129247526	G

	gene	pos	refNt	altNt
0	ADCY8	131880292	T	A
1	HDHD1	6976113	C	T
2	ATP2B2	10400253	C	T
3	LHB	49519855	T	G
4	NRK	105132142	C	A

	gene	pos	refNt	altNt
0	PTGER4P2	66499841	T	C
1	HLA-DQB1	32633009	T	C
2	ZNF596	194612	A	T
3	ZNF766	52786700	CT	C
4	BRWD1	40636351	C	T
5	NaN	10793861	A	G
6	HLA-DRB5	32497885	C	T
7	LINC00273	33965672	C	T
8	TRBV6-1	142028106	C	G
9	CDC42BPG	64594099	T	G

	chr	value
0	1	val1
1	x	val2
2	y	val3