--- title: Liftover module keywords: fastai sidebar: home_sidebar summary: "Liftover genodata and sumstat" description: "Liftover genodata and sumstat" nb_path: "02_Liftover.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class Liftover[source]

Liftover(fr='hg19', to='hg38')

{% endraw %} {% raw %}
{% endraw %} {% raw %}
lf = Liftover('hg19','hg38')
{% endraw %} {% raw %}
vcf ='data/GH.AR.SAD.P1.001.0_X3547_S42_1180478_GVCF.hard-filtered.gvcf.gz'
{% endraw %} {% raw %}
lf.region_liftover([5,272741,1213528-900000])
(5, 272626, 313413)
{% endraw %} {% raw %}
lf.vcf_liftover(vcf)
{% endraw %}

Test liftover sumstat

{% raw %}
from LDtools.sumstat import Sumstat
from LDtools.liftover import Liftover
def gwas_liftover(input_path,output_path,output_unmapped,output_mapped,fr='hg19',to='hg38',remove_missing=False):
    lf = Liftover(fr,to)
    print("reading GWAS sumstat")
    sums = Sumstat(input_path)
    print("liftover from" + fr +"to" +to)
    sums1 = lf.sumstat_liftover(sums.ss)
    if remove_missing:
        sums1[sums1.CHR == 0].to_csv(output_unmapped, compression='gzip', sep = "\t", header = True, index = False)
        sums1[sums1.CHR != 0].to_csv(output_mapped, compression='gzip', sep = "\t", header = True, index = False)
    else:
        sums1.to_csv(output_path, compression='gzip', sep = "\t", header = True, index = False)
{% endraw %} {% raw %}
def gwas_liftover(input_file,output_path=None,fr='hg19',to='hg38',remove_missing=False):
    if output_path is None:
        output_path = os.path.dirname(input_file)+'/'
    basename = os.path.basename(input_file)
    lf = Liftover('hg19','hg38')
    print("reading GWAS sumstat")
    sums = Sumstat(input_path)
    print("liftover from" + fr +"to" +to)
    sums1 = lf.sumstat_liftover(sums.ss)
    if remove_missing:
        sums1[sums1.CHR == 0].to_csv(output_unmapped, compression='gzip', sep = "\t", header = True, index = False)
        sums1[sums1.CHR != 0].to_csv(output_mapped, compression='gzip', sep = "\t", header = True, index = False)
    else:
        sums1.to_csv(output_path, compression='gzip', sep = "\t", header = True, index = False)
{% endraw %} {% raw %}
sumstats_lifted = f'{cwd}/{_input:bnn}.hg38.sumstats.gz',
sumstats_unmapped = f'{cwd}/{_input:bnn}.hg38.sumstats_unmapped.gz',
sumstats_mapped = f'{cwd}/{_input:bnn}.hg38.sumstats_mapped.gz'
{% endraw %} {% raw %}
import os
{% endraw %} {% raw %}
tmp = os.path.basename(input_path)
{% endraw %} {% raw %}
os.path.splitext(tmp)
('100521_UKBB_Combined_f2247_f2257_expandedwhite_93258cases_237318ctrl_500k_PC1_PC2_f2247_f2257.regenie.snp_stats',
 '.gz')
{% endraw %} {% raw %}
os.path.dirname(input_path)+'/'
'/home/dmc2245/UKBiobank/results/REGENIE_results/results_imputed_data/2021_10_07_combined_500K/'
{% endraw %} {% raw %}
sums = Sumstat(input_path)
{% endraw %} {% raw %}
lf = Liftover('hg19','hg38')
{% endraw %} {% raw %}
lf.sumstat_liftover(sums.ss[:10])
CHR POS REF ALT SNP BETA SE P
0 1 13259 G A chr1:13259:G:A 0.434586 0.175780 0.014801
1 1 17569 C A chr1:17569:C:A -0.030568 0.795968 0.969366
2 1 17641 G A chr1:17641:G:A -0.078881 0.108663 0.467883
3 1 30741 C A chr1:30741:C:A -1.599610 0.990472 0.044798
4 1 57222 T C chr1:57222:T:C 0.031666 0.121422 0.794253
5 1 58396 T C chr1:58396:T:C 0.366266 0.172004 0.035663
6 1 62157 G A chr1:62157:G:A -0.147251 0.296105 0.618983
7 1 62595 C T chr1:62595:C:T 0.356993 0.171623 0.040096
8 1 69487 G A chr1:69487:G:A -0.559373 0.853882 0.512407
9 1 69569 T C chr1:69569:T:C 0.232690 0.216585 0.282662
{% endraw %} {% raw %}
def main(input_path,output_path,remove_missing):
    sums = read_regenie(input_path)
    sums1 = sumstat_liftover(sums)
    if remove_missing:
        sums1[sums1.CHR == 0].to_csv(output_path, sep = "\t", header = True, index = False)
    else:
        sums1.to_csv(output_path, sep = "\t", header = True, index = False)
{% endraw %} {% raw %}
input_path = '/home/dmc2245/UKBiobank/results/REGENIE_results/results_imputed_data/2021_10_07_combined_500K/100521_UKBB_Combined_f2247_f2257_expandedwhite_93258cases_237318ctrl_500k_PC1_PC2_f2247_f2257.regenie.snp_stats.gz'
output_path = ''
remove_missing = True
{% endraw %} {% raw %}
main(input_path,output_path,remove_missing)
{% endraw %} {% raw %}
':'.join([1,'1'])
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/1967998.1.high_mem.q/ipykernel_39087/731612175.py in <module>
----> 1 ':'.join([1,'1'])

TypeError: sequence item 0: expected str instance, int found
{% endraw %}