--- title: Liftover module keywords: fastai sidebar: home_sidebar summary: "Liftover genodata and sumstat" description: "Liftover genodata and sumstat" nb_path: "02_Liftover.ipynb" ---
lf = Liftover('hg19','hg38')
vcf ='data/GH.AR.SAD.P1.001.0_X3547_S42_1180478_GVCF.hard-filtered.gvcf.gz'
lf.region_liftover([5,272741,1213528-900000])
lf.vcf_liftover(vcf)
from LDtools.sumstat import Sumstat
from LDtools.liftover import Liftover
def gwas_liftover(input_path,output_path,output_unmapped,output_mapped,fr='hg19',to='hg38',remove_missing=False):
lf = Liftover(fr,to)
print("reading GWAS sumstat")
sums = Sumstat(input_path)
print("liftover from" + fr +"to" +to)
sums1 = lf.sumstat_liftover(sums.ss)
if remove_missing:
sums1[sums1.CHR == 0].to_csv(output_unmapped, compression='gzip', sep = "\t", header = True, index = False)
sums1[sums1.CHR != 0].to_csv(output_mapped, compression='gzip', sep = "\t", header = True, index = False)
else:
sums1.to_csv(output_path, compression='gzip', sep = "\t", header = True, index = False)
def gwas_liftover(input_file,output_path=None,fr='hg19',to='hg38',remove_missing=False):
if output_path is None:
output_path = os.path.dirname(input_file)+'/'
basename = os.path.basename(input_file)
lf = Liftover('hg19','hg38')
print("reading GWAS sumstat")
sums = Sumstat(input_path)
print("liftover from" + fr +"to" +to)
sums1 = lf.sumstat_liftover(sums.ss)
if remove_missing:
sums1[sums1.CHR == 0].to_csv(output_unmapped, compression='gzip', sep = "\t", header = True, index = False)
sums1[sums1.CHR != 0].to_csv(output_mapped, compression='gzip', sep = "\t", header = True, index = False)
else:
sums1.to_csv(output_path, compression='gzip', sep = "\t", header = True, index = False)
sumstats_lifted = f'{cwd}/{_input:bnn}.hg38.sumstats.gz',
sumstats_unmapped = f'{cwd}/{_input:bnn}.hg38.sumstats_unmapped.gz',
sumstats_mapped = f'{cwd}/{_input:bnn}.hg38.sumstats_mapped.gz'
import os
tmp = os.path.basename(input_path)
os.path.splitext(tmp)
os.path.dirname(input_path)+'/'
sums = Sumstat(input_path)
lf = Liftover('hg19','hg38')
lf.sumstat_liftover(sums.ss[:10])
def main(input_path,output_path,remove_missing):
sums = read_regenie(input_path)
sums1 = sumstat_liftover(sums)
if remove_missing:
sums1[sums1.CHR == 0].to_csv(output_path, sep = "\t", header = True, index = False)
else:
sums1.to_csv(output_path, sep = "\t", header = True, index = False)
input_path = '/home/dmc2245/UKBiobank/results/REGENIE_results/results_imputed_data/2021_10_07_combined_500K/100521_UKBB_Combined_f2247_f2257_expandedwhite_93258cases_237318ctrl_500k_PC1_PC2_f2247_f2257.regenie.snp_stats.gz'
output_path = ''
remove_missing = True
main(input_path,output_path,remove_missing)
':'.join([1,'1'])