## A simple R script for loading and processing the ArrayExress ## data set E-MEXP-886 (5 Wt vs 5 Atxn1 Ko mice) ## ## Data at http://www.ebi.ac.uk/arrayexpress/experiments/E-MEXP-886 ## E-MEXP-886.raw.1.zip (celfiles) ## E-MEXP-886.sdrf.txt (sample description) ## ## In practice, use package ArrayExpress for reading directly from EBI ## ## Alexander.Ploner@ki.se 2010-10-18 ## Celfiles (unzipped) and sample description should be here WORKDIR = "~/bitsCourse/Data/Atxn1/" ## Read all celfiles in the current working directory setwd(WORKDIR) library(affy) rawdata = ReadAffy() ## Names of samples sampleNames(rawdata) ## From the sample description, these are the genotypes in the same order genotype = c("ko", "wt", "ko", "ko", "ko", "wt", "wt", "wt", "wt","ko") phenodata = data.frame(genotype) rownames(phenodata) = sampleNames(rawdata) colnames(phenodata) = "Genotype" ## Store with expression data pData(rawdata) = phenodata ## Calculate normalized RMA expression values rmaexp = rma(rawdata) ## Specify the genotype grouping for Limma design = model.matrix( ~ Genotype, data=pData(rmaexp)) ## Fit the gene-wise models with limma library(limma) fit = lmFit(rmaexp, design) efit = eBayes(fit) ## Extract the top 10 regulated genes topGenes = topTable(efit, coef=2) ## Build an annotated table of top genes library(annaffy) ## This selects for basic gene and pathway information annCols = aaf.handler()[c(1:3, 7:9, 12)] ## Get the IDs of the top genes ## CHECK ME: correct package for annotation? annotation(rmaexp) annTab = aafTableAnn(topGenes$ID, "moe430a.db", annCols) ## Get statistics annTab2 = aafTable(logFC=round(topGenes$logFC, 2), tstat=round(topGenes$t, 1), Pval=signif(topGenes$P.Value, 1), FDR=signif(topGenes$adj.P.Val, 2)) ## Merge statistics and annotation annTab = merge(annTab2, annTab) ## Write to HTML file with nice name and title saveHTML(annTab, "Genelist Wt-Ko.html", title = "Ataxin1: Wt-Ko")