--- title: Genodata module keywords: fastai sidebar: home_sidebar summary: "read and extract genodata" description: "read and extract genodata" nb_path: "00_Genodata.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

read_bgen[source]

read_bgen(file, sample_file=None)

the function to read genotype data

{% endraw %} {% raw %}
{% endraw %} {% raw %}

bgen2dask[source]

bgen2dask(bgen, index, step=500)

The function to covert bgen to dask array

{% endraw %} {% raw %}
{% endraw %} {% raw %}

extract_bed[source]

extract_bed(geno, idx, row=True, step=500)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

class Genodata[source]

Genodata(geno_path, sample_path=None)

{% endraw %} {% raw %}
{% endraw %}

Test

{% raw %}
geno_path ='/home/dmc2245/UKBiobank/data/exome_files/project_VCF/072721_run/plink/ukb23156_c1.merged.filtered.bed'
{% endraw %} {% raw %}
/home/dmc2245/UKBiobank/data/exome_files/project_VCF/072721_run/plink/092321_UKBB_qc_exome_geno_path.txt
{% endraw %} {% raw %}
region = [5,272741,1213528-900000]
geno_path = 'MWE_region_extraction/ukb23156_c5.merged.filtered.5_272741_1213528.bed'
sumstats_path = 'MWE_region_extraction/090321_UKBB_Hearing_aid_f3393_expandedwhite_6436cases_96601ctrl_PC1_2_f3393.regenie.snp_stats'
pheno_path = None
unr_path = 'MWE_region_extraction/UKB_genotypedatadownloaded083019.090221_sample_variant_qc_final_callrate90.filtered.extracted.white_europeans.filtered.092821_ldprun_unrelated.filtered.prune.txt'
imp_geno_path = 'MWE_region_extraction/ukb_imp_chr5_v3_05_272856_1213643.bgen'
imp_sumstats_path = 'MWE_region_extraction/100521_UKBB_Hearing_aid_f3393_expandedwhite_15601cases_237318ctrl_500k_PC1_PC2_f3393.regenie.snp_stats'
imp_ref = 'hg19'

output_sumstats = 'test.snp_stats'
output_LD = 'test_corr.csv'

#main(region,geno_path,sumstats_path,pheno_path,unr_path,imp_geno_path,imp_sumstats_path,imp_ref,output_sumstats,output_LD)
{% endraw %} {% raw %}
from pandas_plink import Chunk
{% endraw %} {% raw %}
Chunk(512,512)
Chunk(nsamples=512, nvariants=512)
{% endraw %} {% raw %}
exome_geno.extractbyvariants(exome_geno.bim.snp[:50])
{% endraw %} {% raw %}
exome_geno.extractbysamples(exome_geno.fam.iid[:60])
{% endraw %}