library(RCurl) library(XML) library(reshape) i <- 0 g <- 1 allpolls <- NULL while(i < 1){ if(length(xmlTreeParse(paste("http://elections.huffingtonpost.com/pollster/api/polls.xml?page=", g, sep=""))$doc$children$polls)<10) i <- 1 newset <- lapply(xmlTreeParse(paste("http://elections.huffingtonpost.com/pollster/api/polls.xml?page=", g, sep=""))$doc$children$polls, function(x) xmlToList(x)) allpolls <- c(allpolls, newset) print(g) g <- g+1 } save(allpolls, file="allpolls.rdata") load("allpolls.rdata") apcnom <- sapply(allpolls, function(x) x$questions["name",]) uniquecontests <- unique(unlist(apcnom)) RvO <- uniquecontests[grepl("Romney vs. Obama", uniquecontests) & grepl("2012", uniquecontests)] relevant <- sapply(apcnom, function(x) sum(RvO %in% x)) anyrel <- allpolls[relevant>0] names(anyrel) <- sapply(anyrel, function(x) paste("poll", x[[1]], sep="")) nm <- unlist(sapply(anyrel, function(x) names(unlist(x)))) nom <- unique(unlist(sapply(anyrel, function(x) names(unlist(x))))) nqs <- nom[!grepl("question", nom)] trNA <- function(x){ out <- try(x) if(is.null(x)) out <- "NA" out } ul <- function(x){ d <- sapply(x, function(q) !is.null(q)) y<-rep(NA, length(x)) y[d] <- unlist(x[d]) y } anyrel2 <- anyrel anyrel2$poll14667b <- anyrel$poll14667 anyrel2$poll14667$questions <- anyrel$poll14667$questions[,1:2] anyrel2$poll14667b$questions <- anyrel$poll14667$questions[,3:4] # All polls in all states for Obama/Romney df1 <- data.frame(id=sapply(anyrel2, function(x) try(x$id)), pollster=sapply(anyrel2, function(x) try(x$pollster)), start_date=sapply(anyrel2, function(x) try(x$start_date)), end_date=sapply(anyrel2, function(x) try(x$end_date)), method=sapply(anyrel2, function(x) try(x$method)), last_updated=sapply(anyrel2, function(x) try(x$last_updated)), house=ul(sapply(anyrel2, function(x) trNA(x$survey_houses$survey_house.name))), name=ul(sapply(anyrel2, function(x) trNA(x$questions[,x$questions["topic",]=="2012-president"]["name"]))), topic=rep("2012-president", length(anyrel2)), state=ul(sapply(anyrel, function(x) trNA(x$questions[,x$questions["topic",]=="2012-president"]["state"]))), subpop=sapply(anyrel, function(x) trNA(x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$name)), observations=as.numeric(as.character(ul(sapply(anyrel, function(x) try(x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$observations))))), moe=ul(sapply(anyrel, function(x) try(x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$margin_of_error))), obama=as.numeric(as.character(ul(sapply(anyrel, function(x) try(x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$responses[,x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$responses["choice",]=="Obama"]$value))))), romney=as.numeric(as.character(ul(sapply(anyrel, function(x) try(x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$responses[,x$questions[,x$questions["topic",]=="2012-president"]$subpopulations[,1]$responses["choice",]=="Romney"]$value))))) ) df1$ProbOb <- df1$obama/(df1$obama+df1$romney) votes2012set <- readHTMLTable("http://www.archives.gov/federal-register/electoral-college/2012/popular-vote.html")[[2]] votes2012 <- data.frame(State=as.character(votes2012set[,1]), Obama=as.numeric(as.character(votes2012set[,2])), Romney=as.numeric(as.character(votes2012set[,3]))) levels(votes2012$State)<-c(levels(votes2012$State), "NY", "US") votes2012$State[33] <- "NY" votes2012$State[52] <- "US" votes2012$ProbOb <- votes2012[,2]/(votes2012[,2]+votes2012[,3]) votes2008set <- readHTMLTable("http://www.archives.gov/federal-register/electoral-college/2008/popular-vote.html")[[2]] votes2008 <- data.frame(State=as.character(votes2008set[,1]), Obama=as.numeric(as.character(votes2008set[,2])), Romney=as.numeric(as.character(votes2008set[,3]))) levels(votes2008$State)<-c(levels(votes2008$State), "US") votes2008$State[52] <- "US" votes2008$ProbOb <- votes2008[,2]/(votes2008[,2]+votes2008[,3]) votes2008$TotalMP <- (votes2008[,2]+votes2008[,3]) votes2004set <- readHTMLTable("http://www.archives.gov/federal-register/electoral-college/2004/popular_vote.html")[[2]] votes2004 <- data.frame(State=as.character(votes2004set[,1]), Bush=as.numeric(gsub(",", "", as.character(votes2004set[,2]))), Kerry=as.numeric(gsub(",", "", as.character(votes2004set[,3])))) levels(votes2004$State)<-c(levels(votes2004$State), "MS", "OH", "US") votes2004$State[25] <- "MS" votes2004$State[36] <- "OH" votes2004$State[52] <- "US" votes2004$ProbDem04 <- votes2004[,3]/(votes2004[,2]+votes2004[,3]) votes2000set <- readHTMLTable("http://www.archives.gov/federal-register/electoral-college/2000/popular_vote.html")[[2]] votes2000 <- data.frame(State=as.character(votes2000set[,1]), Bush1=as.numeric(gsub(",", "", as.character(votes2000set[,2]))), Gore=as.numeric(gsub(",", "", as.character(votes2000set[,3])))) levels(votes2000$State)<-c(levels(votes2000$State), "CA", "US") votes2000$State[5] <- "CA" votes2000$State[52] <- "US" votes2000$ProbDem00 <- votes2000[,3]/(votes2000[,2]+votes2000[,3]) votes1996set <- read.csv("http://joshpasek.com/wp-content/uploads/2014/06/Election1996.csv") votes1996 <- data.frame(State=votes1996set$State, Clinton=votes1996set$Clinton.1, Dole=votes1996set$Clinton.1) votes1996$ProbDem96 <- votes1996[,2]/(votes1996[,2]+votes1996[,3]) statevotes <- list(votes1996, votes2000, votes2004, votes2008) priorelect <- merge_recurse(statevotes, by="State") priorvote <- with(priorelect, data.frame(State=State, Prior=(ProbDem96+ProbDem00+ProbDem04+ProbOb)/4)) linzerset <- read.csv("http://joshpasek.com/wp-content/uploads/2014/06/Linzer-state-forecasts.csv") jackmanset <- read.csv("http://joshpasek.com/wp-content/uploads/2014/06/jackman-2012.csv") addsetsp <- read.csv("AdditionalNationalComparisons.csv") addsets <- addsetsp[rowSums(addsetsp[,-1], na.rm=TRUE)>0,] addsets$Daysout <- as.numeric(as.Date("11/06/2012", format="%m/%d/%y")-as.Date(addsets$Date, format="%m/%d/%y")) addsets$Fivethirtyeightabserr <- addsets$Fivethirtyeight/100-votes2012$ProbOb[votes2012$State=="US"] addsets$Pollyvoteabserr <- addsets$Pollyvote/100-votes2012$ProbOb[votes2012$State=="US"] addsets$RCPabserr <- addsets$RCP/100-votes2012$ProbOb[votes2012$State=="US"] jackmanset$ProbOb <- jackmanset$obama/(jackmanset$obama+jackmanset$romney) jackmanset$Daysout <- as.numeric(as.Date("2012-11-06")-as.Date(jackmanset$date)) linzerset$Daysout <- as.numeric(as.Date("2012-11-06")-as.Date(linzerset$date, format="%m/%d/%Y")) df1$Daysout <- as.numeric(as.Date("2012-11-06"))-as.numeric(as.Date(df1$end_date)) finals <- data.frame(state=votes2012$State, actual=votes2012$ProbOb) numvotes <- data.frame(state=votes2008$State, totvotes=votes2008$TotalMP) df2 <- merge(df1, finals, all.x=TRUE) df2$Error <- df2$ProbOb-df2$actual longlinza <- lapply(2:51, function(x) data.frame(state=colnames(linzerset)[x], ProbOb=linzerset[,x]/100, Daysout=linzerset$Daysout)) longlinz <- merge_recurse(longlinza) jackmanfull <- merge(jackmanset, finals, all.x=TRUE) jackmanfull$Error <- jackmanfull$ProbOb-jackmanfull$actual linzerplus1 <- merge(longlinz, finals, all.x=TRUE) linzerplus2 <- merge(linzerplus1, numvotes, all.x=TRUE) linzerplus1$Error <- linzerplus1$ProbOb-linzerplus1$actual library(weights) linzerUS <- t(sapply(unique(linzerplus2$Daysout), function(d) c(state="US", Daysout=d, ProbOb=wtd.mean(linzerplus2$ProbOb[linzerplus2$Daysout==d], linzerplus2$totvotes[linzerplus2$Daysout==d]), actual=finals$actual[52], Error=wtd.mean(linzerplus2$ProbOb[linzerplus2$Daysout==d], linzerplus2$totvotes[linzerplus2$Daysout==d])-finals$actual[52]))) linzerplus <- as.data.frame(rbind(linzerplus1, linzerUS)) for(i in 2:length(linzerplus)) linzerplus[,i] <- as.numeric(as.character(linzerplus[,i])) plot(df2$Error~log(df2$Daysout)) lines(linzerplus$Error~log(linzerplus$Daysout), col="red", type="p") ## Ohio, Virginia, Florida as Examples stateplot <- function(statename, ylim=c(0,8), xlim=c(0, 75), main=paste("Predicting Election Results From", statename)){ statepolls <- df2[df2$state==statename,] natpolls <- df2[df2$state=="US",] statehistory <- priorvote[priorvote$State==statename,2] stateseparate <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(abs(statepolls$Error[(d)statepolls$Daysout]), na.rm=TRUE))))) staterolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(statepolls$Error[(d)statepolls$Daysout], na.rm=TRUE))))) staterollingphone <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(statepolls$Error[(d)statepolls$Daysout & statepolls$method=="Phone"], na.rm=TRUE))))) if(statename=="US"){ forecasting <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=(.1968+.0408*.34+.59*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)-statepolls$actual[1]))))) } desartrolling <- as.data.frame(t(sapply(1:40, function(d) c(Daysout=d, AvgErr=(-.13906+.364*statehistory+.75*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)+.163*mean(natpolls$ProbOb[(d)natpolls$Daysout], na.rm=TRUE)-statepolls$actual[1]))))) linzerstate <- linzerplus[linzerplus$state==statename,] jackmanstate <- jackmanfull[jackmanfull$state==statename,] plot(abs(100*statepolls$Error[statepolls$Daysout<75])~statepolls$Daysout[statepolls$Daysout<75], xlab="Days From Election", ylab="Absolute Error", ylim=ylim, xlim=xlim, main=main) lines(abs(100*stateseparate$AvgErr)~stateseparate$Daysout, lty=2, lwd=3) lines(abs(100*staterolling$AvgErr)~staterolling$Daysout, lwd=3) lines(abs(100*desartrolling$AvgErr)~desartrolling$Daysout, col="purple", lwd=3) lines(abs(100*linzerstate$Error)~linzerstate$Daysout, col="red", lwd=3) lines(abs(100*jackmanstate$Error[order(jackmanstate$Daysout)])~jackmanstate$Daysout[order(jackmanstate$Daysout)], col="blue", lwd=3) if(statename=="US"){ lines(abs(100*forecasting$AvgErr)~forecasting$Daysout, col="dark green", lwd=3) lines(abs(100*addsets$Fivethirtyeightabserr)~addsets$Daysout, col="orange", lwd=3) lines(abs(100*addsets$Pollyvoteabserr)~addsets$Daysout, col="pink", lwd=3) lines(abs(100*addsets$RCPabserr)~addsets$Daysout, col="brown", lwd=3) } } stateplotnonabs <- function(statename, ylim=c(-8,8), xlim=c(0, 75), main=paste("Predicting Election Results From", statename)){ statepolls <- df2[df2$state==statename,] natpolls <- df2[df2$state=="US",] statehistory <- priorvote[priorvote$State==statename,2] stateseparate <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(abs(statepolls$Error[(d)statepolls$Daysout]), na.rm=TRUE))))) staterolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(statepolls$Error[(d)statepolls$Daysout], na.rm=TRUE))))) staterollingphone <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(statepolls$Error[(d)statepolls$Daysout & statepolls$method=="Phone"], na.rm=TRUE))))) if(statename=="US"){ forecasting <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=(.1968+.0408*.34+.59*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)-statepolls$actual[1]))))) } desartrolling <- as.data.frame(t(sapply(1:40, function(d) c(Daysout=d, AvgErr=(-.13906+.364*statehistory+.75*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)+.163*mean(natpolls$ProbOb[(d)natpolls$Daysout], na.rm=TRUE)-statepolls$actual[1]))))) linzerstate <- linzerplus[linzerplus$state==statename,] jackmanstate <- jackmanfull[jackmanfull$state==statename,] plot(-(100*statepolls$Error[statepolls$Daysout<75])~statepolls$Daysout[statepolls$Daysout<75], xlab="Days From Election", ylab="Actual Obama Share - Predicted Obama Share", ylim=ylim, xlim=xlim, main=main) lines((100*stateseparate$AvgErr)~stateseparate$Daysout, lty=2, lwd=3) lines(-(100*staterolling$AvgErr)~staterolling$Daysout, lwd=3) lines(-(100*desartrolling$AvgErr)~desartrolling$Daysout, col="purple", lwd=3) lines(-(100*linzerstate$Error)~linzerstate$Daysout, col="red", lwd=3) lines(-(100*jackmanstate$Error[order(jackmanstate$Daysout)])~jackmanstate$Daysout[order(jackmanstate$Daysout)], col="blue", lwd=3) if(statename=="US"){ lines(-(100*forecasting$AvgErr)~forecasting$Daysout, col="dark green", lwd=3) lines(-(100*addsets$Fivethirtyeightabserr)~addsets$Daysout, col="orange", lwd=3) lines(-(100*addsets$Pollyvoteabserr)~addsets$Daysout, col="pink", lwd=3) lines(-(100*addsets$RCPabserr)~addsets$Daysout, col="brown", lwd=3) } abline(h=0, lty=3, col="dark gray") } #stateplot("OH", ylim=c(0,5)) #par(mfrow=c(2,1)) pdf("NatAccuracyChecks.pdf", width=8.5, height=6) par(mfrow=c(1,1)) sapply(rev(names(sort(table(df2$state))))[1], stateplot) legend(x="topright", legend=c("Average Survey Error", "Aggregated Survey Error", "RealClearPolitics", "Erikson & Wlezien", "DeSart & Holbrook", "Jackman (Pollster)", "Graefe et al. (PollyVote)", "Silver (FiveThirtyEight)", "Linzer (Votamatic)"), col=c("black", "black", "brown", "dark green", "purple", "blue", "pink", "orange", "red"), lwd=2, lty=c(2,1,1,1,1,1,1,1,1)) dev.off() pdf("NatAccuracyChecksNoAbs.pdf", width=8.5, height=8) par(mfrow=c(1,1)) sapply(rev(names(sort(table(df2$state))))[1], stateplotnonabs) legend(x="bottomright", legend=c("Absolute Average Survey Error", "Aggregated Survey Error", "RealClearPolitics", "Erikson & Wlezien", "DeSart & Holbrook", "Jackman (Pollster)", "Graefe et al. (PollyVote)", "Silver (FiveThirtyEight)", "Linzer (Votamatic)"), col=c("black", "black", "brown", "dark green", "purple", "blue", "pink", "orange", "red"), lwd=2, lty=c(2,1,1,1,1,1,1,1,1)) dev.off() pdf("AccuracyChecks.pdf", width=8.5, height=11) par(mfrow=c(4,2)) sapply(rev(names(sort(table(df2$state))))[2:length(table(df2$state))], stateplot) dev.off() pdf("AccuracyChecksNoAbs.pdf", width=8.5, height=11) par(mfrow=c(4,2)) sapply(rev(names(sort(table(df2$state))))[2:length(table(df2$state))], stateplotnonabs) dev.off() matrix(c(unique(as.character(linzerplus$state)), unique(as.character(jackmanfull$state)), "", "", "", "", unique(as.character(df2$state)), "", "", "", "", ""), nrow=3, ncol=51, byrow=TRUE) ## Within State MSEs ## msestate <- function(statename, range=7){ statepolls <- df2[df2$state==statename,] natpolls <- df2[df2$state=="US",] statehistory <- priorvote[priorvote$State==statename,2] stateseparate <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(abs(statepolls$Error[(d)statepolls$Daysout]), na.rm=TRUE))))) staterolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(statepolls$Error[(d)statepolls$Daysout], na.rm=TRUE))))) staterollingphone <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=mean(statepolls$Error[(d)statepolls$Daysout & statepolls$method=="Phone"], na.rm=TRUE))))) linzerstate <- linzerplus[linzerplus$state==statename,] jackmanstate <- jackmanfull[jackmanfull$state==statename,] forecasting <- rcp <- pv <- ft8 <- data.frame(avgerr=rep(NA, 75), AvgErr=rep(NA, 75), Daysout=1:75) if(statename=="US"){ forecasting <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=(.1968+.0408*.34+.59*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)-statepolls$actual[1]))))) rcp <- with(addsets, data.frame(avgerr=RCPabserr, Daysout=Daysout)) pv <- with(addsets, data.frame(avgerr=Pollyvoteabserr, Daysout=Daysout)) ft8 <- with(addsets, data.frame(avgerr=Fivethirtyeightabserr, Daysout=Daysout)) } else forecasting <- data.frame(Daysout=1:75, AvgErr=rep(NA, 75)) desartrolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, AvgErr=(-.13906+.364*statehistory+.75*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)+.163*mean(natpolls$ProbOb[(d)natpolls$Daysout], na.rm=TRUE)-statepolls$actual[1]))))) statemse <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, SurveyMSE=eval(mean((statepolls$Error[(d)statepolls$Daysout])^2, na.rm=TRUE)), SurveyAvgMSE=eval(mean(statepolls$Error[(d)statepolls$Daysout], na.rm=TRUE)^2), RCP=min(eval((rcp$avgerr[rcp$Daysout==d])^2), "NA"), Forecasting=min(eval((forecasting$AvgErr[forecasting$Daysout==d])^2), "NA"), Desart=eval(min(desartrolling$AvgErr[desartrolling$Daysout==d]^2, "NA")), Jackman=min(eval((jackmanstate$Error[jackmanstate$Daysout==d])^2), "NA"), Pollyvote=min(eval((pv$avgerr[pv$Daysout==d])^2), "NA"), Linzer=min(eval((linzerstate$Error[linzerstate$Daysout==d])^2), "NA"), Fivethirtyeight=min(eval((ft8$avgerr[ft8$Daysout==d])^2), "NA"))))) npolls <- sapply(1:75, function(d) length(statepolls$Error[(d)statepolls$Daysout])) for(i in length(statemse)) statemse[,i] <- as.numeric(as.character(statemse[,i])) statemse } ohcompare <- msestate("OH") allcompare <- lapply(rev(names(sort(table(df2$state)))), function(x) msestate(x)) normstate <- sapply(allcompare, function(x) sapply(x, function(g) mean(as.numeric(as.character(g)), na.rm=TRUE))) colnames(normstate) <- names(allcompare) <- rev(names(sort(table(df2$state)))) nat <- allcompare[[1]] acm <- merge_recurse(allcompare[2:length(allcompare)]) acmf <- merge_recurse(allcompare) finalcompare <- acmf[acmf$Daysout==1,] propdif <- function(x, y){ x <- as.numeric(as.character(x)) y <- as.numeric(as.character(y)) comparative <- mean(x[!is.na(x) & !is.na(y)], na.rm=TRUE)/mean(y[!is.na(x) & !is.na(y)], na.rm=TRUE) comparative } sumdif <- function(x, y){ x <- as.numeric(as.character(x)) y <- as.numeric(as.character(y)) comparative <- sum(x15 d2 <- Daysout>18 d3 <- Daysout>23 d4 <- Daysout>31 d5 <- Daysout>62 d6 <- Daysout>68 100*coef(lm(pollavg~d1+d2+d3+d4+d5+d6))[-1] } regstate <- function(statename, range=7){ statepolls <- df2[df2$state==statename,] natpolls <- df2[df2$state=="US",] statehistory <- priorvote[priorvote$State==statename,2] stateseparatecoef <- try(coefset(statepolls$ProbOb[!is.na(statepolls$ProbOb) & statepolls$Daysout<76], statepolls$Daysout[!is.na(statepolls$ProbOb) & statepolls$Daysout<76])) if(class(stateseparatecoef)=="try-error") stateseparatecoef <- rep(NA, 6) staterolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, Est=mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE))))) staterollingcoef <- try(coefset(staterolling$Est, staterolling$Daysout)) if(class(staterollingcoef)=="try-error") staterollingcoef <- rep(NA, 6) desartrolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, Est=(-.13906+.364*statehistory+.75*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)+.163*mean(natpolls$ProbOb[(d)natpolls$Daysout], na.rm=TRUE)))))) desartrollingcoef <- try(coefset(desartrolling$Est, desartrolling$Daysout)) if(class(desartrollingcoef)=="try-error") desartrollingcoef <- rep(NA, 6) linzerstate <- linzerplus[linzerplus$state==statename,] linzerstatecoef <- try(coefset(linzerstate$ProbOb[linzerstate$Daysout<76], linzerstate$Daysout[linzerstate$Daysout<76])) if(class(linzerstatecoef)=="try-error") linzerstatecoef <- rep(NA, 6) jackmanstate <- jackmanfull[jackmanfull$state==statename,] jackmanstatecoef <- try(coefset(jackmanstate$ProbOb[jackmanstate$Daysout<76], jackmanstate$Daysout[jackmanstate$Daysout<76])) if(class(jackmanstatecoef)=="try-error") jackmanstatecoef <- rep(NA, 6) forecastingcoef <- rcpcoef <- pvcoef <- ft8coef <- rep(NA, 6) if(statename=="US"){ forecasting <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, Est=(.1968+.0408*.34+.59*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)))))) forecastingcoef <- coefset(forecasting$Est, forecasting$Daysout) rcp <- with(addsets, data.frame(Est=RCP/100, Daysout=Daysout)) rcpcoef <- coefset(rcp$Est[rcp$Daysout<76], rcp$Daysout[rcp$Daysout<76]) pv <- with(addsets, data.frame(Est=Pollyvote/100, Daysout=Daysout)) pvcoef <- coefset(pv$Est[pv$Daysout<76], pv$Daysout[pv$Daysout<76]) ft8 <- with(addsets, data.frame(Est=Fivethirtyeight/100, Daysout=Daysout)) ft8coef <- coefset(ft8$Est[ft8$Daysout<76], ft8$Daysout[ft8$Daysout<76]) } ls <- data.frame(stateseparatecoef, staterollingcoef, rcpcoef, forecastingcoef, desartrollingcoef, jackmanstatecoef, pvcoef, linzerstatecoef, ft8coef) ls } for(i in rev(names(sort(table(df2$state))))){ print(i) regstate(i) } allcomparecoefs <- lapply(rev(names(sort(table(df2$state)))), function(x) regstate(x)) accs <- abs(merge_recurse(allcomparecoefs)) coefsumdifset <- t(sapply(accs, function(x) sapply(accs, function(y) sumdif(x, y)))) coeflt <- t(apply(coefsumdifset, 1, function(x) c(separate=x[1], aggregation=sum(x[c(2:3)]), predictive=sum(x[4:5]), hybrid=sum(x[c(6:9)]), track=sum(x[c(1:3, 6)]), forecast=sum(x[c(4:5, 7:9)]), total=sum(x[1:9])))) coefgt <- t(apply(coefsumdifset, 2, function(x) c(separate=x[1], aggregation=sum(x[c(2:3)]), predictive=sum(x[4:5]), hybrid=sum(x[c(6:9)]), track=sum(x[c(1:3, 6)]), forecast=sum(x[c(4:5, 7:9)]), total=sum(x[1:9])))) coeftot <- coefgt+coeflt write.csv(coeftot, "Tables/coefNs.csv") 100*coefgt/(coefgt+coeflt) write.csv(100*coefgt/(coefgt+coeflt), "Tables/coefPropMoreVariable.csv") coefminp <- 2*(1-pbinom(coeflt, (coefgt+coeflt), .5)) coefmaxp <- 2*(1-pbinom(coefgt, (coefgt+coeflt), .5)) coeffinp <- coefminp coeffinp[coefmaxpstatepolls$Daysout], na.rm=TRUE))))) desartrolling <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, Est=(-.13906+.364*statehistory+.75*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)+.163*mean(natpolls$ProbOb[(d)natpolls$Daysout], na.rm=TRUE)))))) linzerstate <- linzerplus[linzerplus$state==statename,] linzerstate$Est <- linzerstate$ProbOb jackmanstate <- jackmanfull[jackmanfull$state==statename,] jackmanstate$Est <- jackmanstate$ProbOb rcp <- pv <- ft8 <- forecasting <- data.frame(Daysout=1:75, Est=rep(NA, 75)) if(statename=="US"){ forecasting <- as.data.frame(t(sapply(1:75, function(d) c(Daysout=d, Est=(.1968+.0408*.34+.59*mean(statepolls$ProbOb[(d)statepolls$Daysout], na.rm=TRUE)))))) rcp <- with(addsets, data.frame(Est=RCP/100, Daysout=Daysout)) pv <- with(addsets, data.frame(Est=Pollyvote/100, Daysout=Daysout)) ft8 <- with(addsets, data.frame(Est=Fivethirtyeight/100, Daysout=Daysout)) } ls <- list(stateseparate, staterolling, rcp, forecasting, desartrolling, jackmanstate, pv, linzerstate, ft8) names(ls) <- c("Separate", "RollingAvg", "RCP", "Erikson", "Desart", "Jackman", "Pollyvote", "Linzer", "FiveThirtyEight") ls } combosetgt <- function(ls){ sapply(ls, function(x) sapply(ls, function(y) as.numeric(sd(x$Est[x$Daysout %in% unique(y$Daysout)], na.rm=TRUE)>sd(y$Est[y$Daysout %in% unique(x$Daysout)], na.rm=TRUE)))) } combosetgtwtd <- function(ls){ sapply(ls, function(x) sapply(ls, function(y) as.numeric(wtd.var(x$Est[x$Daysout %in% unique(y$Daysout)], 1/table(x$Daysout)[x$Daysout[x$Daysout %in% unique(y$Daysout)]], normwt=TRUE, na.rm=TRUE)>wtd.var(y$Est[y$Daysout %in% unique(x$Daysout)], 1/table(y$Daysout)[y$Daysout[y$Daysout %in% unique(x$Daysout)]], normwt=TRUE, na.rm=TRUE)))) } combosetlt <- function(ls){ sapply(ls, function(x) sapply(ls, function(y) as.numeric(sd(x$Est[x$Daysout %in% unique(y$Daysout)], na.rm=TRUE)