## A simple R script for loading and processing the ArrayExress 
## data set E-MEXP-886 (5 Wt vs 5 Atxn1 Ko mice)
##
## Data at http://www.ebi.ac.uk/arrayexpress/experiments/E-MEXP-886
##      E-MEXP-886.raw.1.zip    (celfiles)
##      E-MEXP-886.sdrf.txt     (sample description)
##
## In practice, use package ArrayExpress for reading directly from EBI
##
## Alexander.Ploner@ki.se 2010-10-18

## Celfiles (unzipped) and sample description should be here
WORKDIR = "~/bitsCourse/Data/Atxn1/"

## Read all celfiles in the current working directory
setwd(WORKDIR)
library(affy)
rawdata = ReadAffy()
## Names of samples
sampleNames(rawdata)

## From the sample description, these are the genotypes in the same order
genotype = c("ko", "wt", "ko", "ko", "ko", "wt", "wt", "wt", "wt","ko")
phenodata = data.frame(genotype)
rownames(phenodata) = sampleNames(rawdata)
colnames(phenodata) = "Genotype"
## Store with expression data
pData(rawdata) = phenodata

## Calculate normalized RMA expression values
rmaexp = rma(rawdata)

## Specify the genotype grouping for Limma
design = model.matrix( ~ Genotype, data=pData(rmaexp))

## Fit the gene-wise models with limma
library(limma)
fit  = lmFit(rmaexp, design)
efit = eBayes(fit)

## Extract the top 10 regulated genes
topGenes = topTable(efit, coef=2)

## Build an annotated table of top genes
library(annaffy)
## This selects for basic gene and pathway information
annCols = aaf.handler()[c(1:3, 7:9, 12)]
## Get the IDs of the top genes
## CHECK ME: correct package for annotation? annotation(rmaexp)
annTab  = aafTableAnn(topGenes$ID, "moe430a.db", annCols)
## Get statistics
annTab2  = aafTable(logFC=round(topGenes$logFC, 2), tstat=round(topGenes$t, 1), Pval=signif(topGenes$P.Value, 1), FDR=signif(topGenes$adj.P.Val, 2))
## Merge statistics and annotation
annTab = merge(annTab2, annTab)
## Write to HTML file with nice name and title
saveHTML(annTab, "Genelist Wt-Ko.html", title = "Ataxin1: Wt-Ko")