#----------------
# SSANOVA-R-Code-NWAVdemo-rev.txt
#----------------
# author: Alicia Wassink (wassink@uw.edu)
# credits: This script cribs heavily from Josef Fruehwald's excellent tutorial (downloaded 3/22/2010
# entitled, "SS ANOVA.pdf")
# relevant publication: LVC-2013
#----------------

##This demo code was prepared for NWAV-2013 vowel trajectory workshop 
##It will compare (ae) BAG vowel in different following contexts, to test the
##hypothesis that following velars, particularly (ŋ) condition raising of the vowel.

##First, we clear memory and read in a couple of necessary libraries. 
rm(list=ls())
install.packages(ggplot2)		# package for plotting 
install.packages(gss)			# containing the SSANOVA algorithm to be used

library(ggplot2)
library(gss)

##Now, we read in one gender x generation datafile (choose the one called PNWE-SSANOVA-spreadsheet.txt)
##Formant data were extracted from each vowel at 20-50-80%. This enables presentation of common proportions
##through each vowel.
aedata.all <- read.delim(file=file.choose(), header=T, fileEncoding="UTF-16", dec=".")

##choose one of the below comparisons by removing the # at the start of the line
aedata<-subset(aedata.all, Vowel %in% c("æ"))
#aedata<-subset(aedata.all, Vowel %in% c("ɛ","æ"))		
#aedata<-subset(aedata.all, Vowel %in% c("æ", "ej"))	
#aedata<-subset(aedata.all, Vowel %in% c("ɛ", "ej"))


# For simplicity, the comparison you chose will be always be called "aedata" below.
attach(aedata)


##create a "layered" plot using our measured interval data, partitioned by FollowingPhone.
formants <- ggplot(aedata, aes(x = Interval, group = Group, colour =  Following.Phone))
formants <- formants + geom_line(aes(y = F.1), alpha = 0.8)
formants <- formants + geom_line(aes(y = F.2), alpha = 0.8)
formants <- formants + ylab("Hz")
formants


##fit a model of each formant to the data using the SS-ANOVA outcomes
f1.model <- ssanova(F1 ~ Following.Phone + Interval + Following.Phone:Interval, data = aedata)
f2.model <- ssanova(F2 ~ Following.Phone + Interval + Following.Phone:Interval, data = aedata)

##create a new array of dummy data with interval values from 0 to 1 (100 rows per group)
#note: +/- 5% bound on seq(). Going outside generates 'inputs are out of bounds' error
grid <- expand.grid(Interval = seq(min(Interval), max(Interval), length = 100), Following.Phone = c("o","ŋ"))


##use SS-ANOVA model to predict formant values and standard errors for the new array
grid$F1.Fit <- predict(f1.model, newdata = grid, se = T)$fit
grid$F1.SE <- predict(f1.model, newdata = grid, se = T)$se.fit
grid$F2.Fit <- predict(f2.model, newdata = grid, se = T)$fit
grid$F2.SE <- predict(f2.model, newdata = grid, se = T)$se.fit

##plot SS-ANOVA curves, with 95% confidence intervals
formant.comparison <- ggplot(grid, aes(x = Interval, colour = Following.Phone))
formant.comparison<-formant.comparison + geom_line(aes(y = F1.Fit),lwd = 2,alpha = 0.8)
formant.comparison<-formant.comparison + geom_line(aes(y = F2.Fit),lwd = 2,alpha = 0.8)
formant.comparison<-formant.comparison + geom_line(aes(y = F1.Fit+(1.96*F1.SE)),lty = 2)
formant.comparison<-formant.comparison + geom_line(aes(y = F1.Fit-(1.96*F1.SE)),lty = 2)
formant.comparison<-formant.comparison + geom_line(aes(y = F2.Fit+(1.96*F2.SE)),lty = 2)
formant.comparison<-formant.comparison + geom_line(aes(y = F2.Fit-(1.96*F2.SE)),lty = 2)
formant.comparison<-formant.comparison + ylab("Hz")

print(formant.comparison)

#print R^2 for tests of significance
summary(f1.model)
summary(f2.model)