Introduction

This tutorial focuses on computing sparse Linkage Disequilibrium (LD) matrices for 1000 Genomes Project (1000G) data across different ancestries and exploring this LD data. These sparse LD matrices are essential for various genomic analyses, including LD score regression, VEGAS gene-based testing, and Bayesian linear regression models used for finemapping or genomic scoring.


Compute Sparse LD matrix and LD scores for EUR

# Load libraries
library(qgg)
library(gact)
library(data.table)

# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")

# Load Glist with information about genotypes in 1000G
Glist <- readRDS(file=file.path(GAlist$dirs["marker"],"Glist_1000G_eur_filtered.rds"))

# Marker IDs used in sparse LD computation
#rsids <- GAlist$rsids
rsids <- unlist(Glist$rsids)

# Compute Sparse LD matrix and LD scores for EAS and save for later use
Glist <- gprep(Glist, task = "sparseld", msize = 1000, rsids = rsids, overwrite = FALSE)

saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eur_filtered.rds"))
file.remove(Glist$ldfiles)

markers <- data.frame(rsids=unlist(Glist$rsids),
                      chr=unlist(Glist$chr),
                      pos=unlist(Glist$pos),
                      ea=unlist(Glist$a1),
                      nea=unlist(Glist$a2),
                      eaf=unlist(Glist$af),
                      maf=unlist(Glist$maf),
                      map=unlist(Glist$map),
                      ldscores=unlist(Glist$ldscores))
rownames(markers) <- markers$rsids
fwrite(markers, file=file.path(GAlist$dirs["marker"],"markers_1000G_eur_filtered.txt.gz"))


Compute Sparse LD matrix and LD scores for EAS

# Load libraries
library(qgg)
library(gact)
library(data.table)

# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")

# Load Glist with information about genotypes in 1000G
Glist <- readRDS(file=file.path(GAlist$dirs["marker"],"Glist_1000G_eas_filtered.rds"))

# Marker IDs used in sparse LD computation
#rsids <- GAlist$rsids
rsids <- unlist(Glist$rsids)

# Compute Sparse LD matrix and LD scores for EAS and save for later use
Glist <- gprep(Glist, task = "sparseld", msize = 1000, rsids = rsids, overwrite = FALSE)
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eas_filtered.rds"))
file.remove(Glist$ldfiles)

markers <- data.frame(rsids=unlist(Glist$rsids),
                      chr=unlist(Glist$chr),
                      pos=unlist(Glist$pos),
                      ea=unlist(Glist$a1),
                      nea=unlist(Glist$a2),
                      eaf=unlist(Glist$af),
                      maf=unlist(Glist$maf),
                      map=unlist(Glist$map),
                      ldscores=unlist(Glist$ldscores))
rownames(markers) <- markers$rsids
fwrite(markers, file=file.path(GAlist$dirs["marker"],"markers_1000G_eas_filtered.txt.gz"))


Compute Sparse LD matrix and LD scores for SAS

# Load libraries
library(qgg)
library(gact)
library(data.table)

# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")

# Load Glist with information about genotypes in 1000G
Glist <- readRDS(file=file.path(GAlist$dirs["marker"],"Glist_1000G_sas_filtered.rds"))

# Marker IDs used in sparse LD computation
#rsids <- GAlist$rsids
rsids <- unlist(Glist$rsids)

# Compute Sparse LD matrix and LD scores for EAS and save for later use
Glist <- gprep(Glist, task = "sparseld", msize = 1000, rsids = rsids, overwrite = FALSE)
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_sas_filtered.rds"))
file.remove(Glist$ldfiles)

markers <- data.frame(rsids=unlist(Glist$rsids),
                      chr=unlist(Glist$chr),
                      pos=unlist(Glist$pos),
                      ea=unlist(Glist$a1),
                      nea=unlist(Glist$a2),
                      eaf=unlist(Glist$af),
                      maf=unlist(Glist$maf),
                      map=unlist(Glist$map),
                      ldscores=unlist(Glist$ldscores))
rownames(markers) <- markers$rsids
fwrite(markers, file=file.path(GAlist$dirs["marker"],"markers_1000G_sas_filtered.txt.gz"))