This tutorial focuses on computing sparse Linkage Disequilibrium (LD) matrices for 1000 Genomes Project (1000G) data across different ancestries and exploring this LD data. These sparse LD matrices are essential for various genomic analyses, including LD score regression, VEGAS gene-based testing, and Bayesian linear regression models used for finemapping or genomic scoring.
# Load libraries
library(qgg)
library(gact)
library(data.table)
# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")
# Load Glist with information about genotypes in 1000G
Glist <- readRDS(file=file.path(GAlist$dirs["marker"],"Glist_1000G_eur_filtered.rds"))
# Marker IDs used in sparse LD computation
#rsids <- GAlist$rsids
rsids <- unlist(Glist$rsids)
# Compute Sparse LD matrix and LD scores for EAS and save for later use
Glist <- gprep(Glist, task = "sparseld", msize = 1000, rsids = rsids, overwrite = FALSE)
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eur_filtered.rds"))
file.remove(Glist$ldfiles)
markers <- data.frame(rsids=unlist(Glist$rsids),
chr=unlist(Glist$chr),
pos=unlist(Glist$pos),
ea=unlist(Glist$a1),
nea=unlist(Glist$a2),
eaf=unlist(Glist$af),
maf=unlist(Glist$maf),
map=unlist(Glist$map),
ldscores=unlist(Glist$ldscores))
rownames(markers) <- markers$rsids
fwrite(markers, file=file.path(GAlist$dirs["marker"],"markers_1000G_eur_filtered.txt.gz"))
# Load libraries
library(qgg)
library(gact)
library(data.table)
# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")
# Load Glist with information about genotypes in 1000G
Glist <- readRDS(file=file.path(GAlist$dirs["marker"],"Glist_1000G_eas_filtered.rds"))
# Marker IDs used in sparse LD computation
#rsids <- GAlist$rsids
rsids <- unlist(Glist$rsids)
# Compute Sparse LD matrix and LD scores for EAS and save for later use
Glist <- gprep(Glist, task = "sparseld", msize = 1000, rsids = rsids, overwrite = FALSE)
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eas_filtered.rds"))
file.remove(Glist$ldfiles)
markers <- data.frame(rsids=unlist(Glist$rsids),
chr=unlist(Glist$chr),
pos=unlist(Glist$pos),
ea=unlist(Glist$a1),
nea=unlist(Glist$a2),
eaf=unlist(Glist$af),
maf=unlist(Glist$maf),
map=unlist(Glist$map),
ldscores=unlist(Glist$ldscores))
rownames(markers) <- markers$rsids
fwrite(markers, file=file.path(GAlist$dirs["marker"],"markers_1000G_eas_filtered.txt.gz"))
# Load libraries
library(qgg)
library(gact)
library(data.table)
# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")
# Load Glist with information about genotypes in 1000G
Glist <- readRDS(file=file.path(GAlist$dirs["marker"],"Glist_1000G_sas_filtered.rds"))
# Marker IDs used in sparse LD computation
#rsids <- GAlist$rsids
rsids <- unlist(Glist$rsids)
# Compute Sparse LD matrix and LD scores for EAS and save for later use
Glist <- gprep(Glist, task = "sparseld", msize = 1000, rsids = rsids, overwrite = FALSE)
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_sas_filtered.rds"))
file.remove(Glist$ldfiles)
markers <- data.frame(rsids=unlist(Glist$rsids),
chr=unlist(Glist$chr),
pos=unlist(Glist$pos),
ea=unlist(Glist$a1),
nea=unlist(Glist$a2),
eaf=unlist(Glist$af),
maf=unlist(Glist$maf),
map=unlist(Glist$map),
ldscores=unlist(Glist$ldscores))
rownames(markers) <- markers$rsids
fwrite(markers, file=file.path(GAlist$dirs["marker"],"markers_1000G_sas_filtered.txt.gz"))