This tutorial details the process of preparing the Glist for 1000 Genomes (1000G) data. It encompasses a series of steps, including loading essential libraries, downloading data, processing genotype data for various ancestries, and saving the results for future use.
# Load libraries
library(qgg)
library(gact)
library(data.table)
# Load GAlist
GAlist <- readRDS(file="C:/Users/gact/hsa.0.0.1/GAlist_hsa.0.0.1.rds")
# Download 1000G data (if not allready downloaded)
GAlist <- downloadDB(GAlist=GAlist, what="1000G")
# Marker IDs in database
rsids <- GAlist$rsids
# Define the file paths for the original bed/bim/fam files to read
bedfiles <- file.path(GAlist$dirs["marker"], "g1000_eur.bed")
bimfiles <- file.path(GAlist$dirs["marker"], "g1000_eur.bim")
famfiles <- file.path(GAlist$dirs["marker"], "g1000_eur.fam")
# Define the file paths for the filtered bed/bim/fam files to write
bedfiles_filtered <- file.path(GAlist$dirs["marker"], "g1000_eur_filtered.bed")
bimfiles_filtered <- file.path(GAlist$dirs["marker"], "g1000_eur_filtered.bim")
famfiles_filtered <- file.path(GAlist$dirs["marker"], "g1000_eur_filtered.fam")
# Call the writeBED function to filter and write the data
writeBED(bedRead=bedfiles,
bimRead=bimfiles,
famRead=famfiles,
bedWrite=bedfiles_filtered,
bimWrite=bimfiles_filtered,
famWrite=famfiles_filtered,
rsids=rsids)
# Prepare summary (i.e. Glist) for 1000G genotype data
Glist <- gprep(study="1000G EUR",
bedfiles=bedfiles_filtered,
bimfiles=bimfiles_filtered,
famfiles=famfiles_filtered)
# Save Glist for use later
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eur_filtered.rds"))
# Marker IDs in database
rsids <- GAlist$rsids
# Define the file paths for the original bed/bim/fam files to read
bedfiles <- file.path(GAlist$dirs["marker"],"g1000_eas.bed")
bimfiles <- file.path(GAlist$dirs["marker"],"g1000_eas.bim")
famfiles <- file.path(GAlist$dirs["marker"],"g1000_eas.fam")
# Define the file paths for the filtered bed/bim/fam files to write
bedfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_eas_filtered.bed")
bimfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_eas_filtered.bim")
famfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_eas_filtered.fam")
## Call the writeBED function to filter and write the data
writeBED(bedRead=bedfiles,
bimRead=bimfiles,
famRead=famfiles,
bedWrite=bedfiles_filtered,
bimWrite=bimfiles_filtered,
famWrite=famfiles_filtered,
rsids=rsids)
# Prepare summary (i.e. Glist) for 1000G genotype data
Glist <- gprep(study="1000G EAS",
bedfiles=bed_filtered,
bimfiles=bim_filtered,
famfiles=fam_filtered)
# Save Glist for use later
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_eas_filtered.rds"))
# Marker IDs in database
rsids <- GAlist$rsids
# Define the file paths for the original bed/bim/fam files to read
bedfiles <- file.path(GAlist$dirs["marker"],"g1000_sas.bed")
bimfiles <- file.path(GAlist$dirs["marker"],"g1000_sas.bim")
famfiles <- file.path(GAlist$dirs["marker"],"g1000_sas.fam")
# Define the file paths for the filtered bed/bim/fam files to write
bedfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_sas_filtered.bed")
bimfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_sas_filtered.bim")
famfiles_filtered <- file.path(GAlist$dirs["marker"],"g1000_sas_filtered.fam")
## Call the writeBED function to filter and write the data
writeBED(bedRead=bedfiles,
bimRead=bimfiles,
famRead=famfiles,
bedWrite=bedfiles_filtered,
bimWrite=bimfiles_filtered,
famWrite=famfiles_filtered,
rsids=rsids)
# Prepare summary (i.e. Glist) for 1000G genotype data
Glist <- gprep(study="1000G SAS",
bedfiles=bed_filtered,
bimfiles=bim_filtered,
famfiles=fam_filtered)
# Save Glist for use later
saveRDS(Glist, file=file.path(GAlist$dirs["marker"],"Glist_1000G_sas_filtered.rds"))