Introduction

This tutorial details the process of preparing the Glist for 1000 Genomes (1000G) data. It encompasses a series of steps, including loading essential libraries, downloading data, processing genotype data for various ancestries, and saving the results for future use.


Download 1000G for different ancestries

# Load libraries
library(qgg)
library(gact)
library(data.table)

# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")

# Download 1000G data (if not allready downloaded)
GAlist <- downloadDB(GAlist=GAlist, what="1000G")


Prepare Glist for 1000G data for European ancestry (EUR)

# Marker IDs in database
rsids <- GAlist$rsids

# Define the file paths for the original bed/bim/fam files to read
bedfiles <- file.path(GAlist$dirs["marker"], "g1000_eur.bed")
bimfiles <- file.path(GAlist$dirs["marker"], "g1000_eur.bim")
famfiles <- file.path(GAlist$dirs["marker"], "g1000_eur.fam")

# Define the file paths for the filtered bed/bim/fam files to write
bedfiles_filtered <- file.path(GAlist$dirs["marker"], "g1000_eur_filtered.bed")
bimfiles_filtered <- file.path(GAlist$dirs["marker"], "g1000_eur_filtered.bim")
famfiles_filtered <- file.path(GAlist$dirs["marker"], "g1000_eur_filtered.fam")

# Call the writeBED function to filter and write the data
writeBED(bedRead=bedfiles, 
         bimRead=bimfiles, 
         famRead=famfiles,
         bedWrite=bedfiles_filtered, 
         bimWrite=bimfiles_filtered, 
         famWrite=famfiles_filtered,
         rsids=rsids)

# Prepare summary (i.e. Glist) for 1000G genotype data
Glist <- gprep(study="1000G EUR",
               bedfiles=bedfiles_filtered,
               bimfiles=bimfiles_filtered,
               famfiles=famfiles_filtered)

# Save Glist for use later
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eur_filtered.rds"))


Prepare Glist for 1000G data for East Asian ancestry (EAS)

# Marker IDs in database
rsids <- GAlist$rsids

# Define the file paths for the original bed/bim/fam files to read
bedfiles <- file.path(GAlist$dirs["marker"],"g1000_eas.bed")
bimfiles <- file.path(GAlist$dirs["marker"],"g1000_eas.bim")
famfiles <- file.path(GAlist$dirs["marker"],"g1000_eas.fam")

# Define the file paths for the filtered bed/bim/fam files to write
bedfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_eas_filtered.bed")
bimfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_eas_filtered.bim")
famfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_eas_filtered.fam")

## Call the writeBED function to filter and write the data
writeBED(bedRead=bedfiles, 
         bimRead=bimfiles, 
         famRead=famfiles,
         bedWrite=bedfiles_filtered, 
         bimWrite=bimfiles_filtered, 
         famWrite=famfiles_filtered,
         rsids=rsids)

# Prepare summary (i.e. Glist) for 1000G genotype data
Glist <- gprep(study="1000G EAS",
               bedfiles=bed_filtered,
               bimfiles=bim_filtered,
               famfiles=fam_filtered)

# Save Glist for use later
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eas_filtered.rds"))


Prepare Glist for 1000G data for South Asian ancestry (SAS)

# Marker IDs in database
rsids <- GAlist$rsids

# Define the file paths for the original bed/bim/fam files to read
bedfiles <- file.path(GAlist$dirs["marker"],"g1000_sas.bed")
bimfiles <- file.path(GAlist$dirs["marker"],"g1000_sas.bim")
famfiles <- file.path(GAlist$dirs["marker"],"g1000_sas.fam")

# Define the file paths for the filtered bed/bim/fam files to write
bedfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_sas_filtered.bed")
bimfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_sas_filtered.bim")
famfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_sas_filtered.fam")

## Call the writeBED function to filter and write the data
writeBED(bedRead=bedfiles, 
         bimRead=bimfiles, 
         famRead=famfiles,
         bedWrite=bedfiles_filtered, 
         bimWrite=bimfiles_filtered, 
         famWrite=famfiles_filtered,
         rsids=rsids)

# Prepare summary (i.e. Glist) for 1000G genotype data
Glist <- gprep(study="1000G SAS",
               bedfiles=bed_filtered,
               bimfiles=bim_filtered,
               famfiles=fam_filtered)

# Save Glist for use later
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_sas_filtered.rds"))