alberto <- c(32,20,5,23,27,32,21,20,12,1,1,21,22,23,31,17,34,15,26,38); jorge <- c(38,13,7,2,32,21,70,65,27,31,30,33,28,40,31,25,38,36,31,40); smoothLocal <- function(data, color=2){ plot((1:length(data)), data, type="o", ylab="y", xlab="x") lines(lowess(data), col=color) } #Takes a tab-delimited file with column headings and plots smoothing functions for each column smooth <- function(fileName, plotOnTop=FALSE){ allData <- read.delim(fileName) for(i in 1:length(names(allData))) { aColumn <- allData[[i]] if(i == 1){ if(plotOnTop){ lines((1:length(aColumn)), aColumn, type="o", ylab="y", xlab="x", main="") } else{ plot((1:length(aColumn)), aColumn, type="o", ylab="y", xlab="x", main=names(allData[i])) } } else{ lines((1:length(aColumn)), aColumn, type="o", ylab="y", xlab="x") } lines(lowess(aColumn), col=i+1) } } ######################################################################################################## minMax <- function(fileName, windowSize=5){ allData <- read.delim(fileName) aColumn <- allData[[1]]; plot((1:length(aColumn)), aColumn, type="p", ylab="y", xlab="x", main=names(allData)) # Get the mins of a moving window mins <- c(); maxes <- c(); for(i in 1:length(aColumn)){ midPoint = windowSize %/% 2 if((i-midPoint)<=0){ low=0 } else{ low=i-midPoint; } if((i+midPoint)>=length(aColumn)){ high=length(aColumn) } else{ high=i+midPoint; } mins[i] = min(aColumn[low:high]) maxes[i] = max(aColumn[low:high]) } lines(1:length(aColumn), mins, type="l", col=3) lines(1:length(aColumn), maxes, type="l", col=4) } ######################################################################################################## getDistances <- function(values){ distance <- c() for(i in 2:length(values)){ distance[i-1] <- abs(values[i] - values[i-1]) } distance } #Expects filename to have SLD data (sequential counts of some language structure usage) from two individuals #Runs Monte Carlo analysis to determine if one is significantly more variable than the other isMoreVariable <- function(filename, numResamples=5000){ allData <- read.delim(filename) distro1 <- allData[[1]] distro2 <- allData[[2]] # First compute testing criterion first <- mean(getDistances(distro1)) second <- mean(getDistances(distro2)) testCriterion <- abs(first-second) # Now resample and compare samples against testCriterion allNums <- c(getDistances(distro1), getDistances(distro2)) sameOrGreater <- 0 for(i in 1:numResamples){ #Shuffle all distances newSample <- sample(allNums) #Split shuffled list of distances into two halves and calculate mean of each mean1 <- mean(newSample[1:(length(newSample)/2)]) mean2 <- mean(newSample[((length(newSample)/2)+1):length(newSample)]) #Count times this difference of means is greater or equal to the difference of means from the original (testCriterion) if(mean1-mean2 >= testCriterion){ sameOrGreater <- (sameOrGreater + 1) } } #Calculate the probability randomly sampled distributions differ more or as much as the original two (p-value) #If less than say .05, we can reject null-hypothesis and assume the original two datasets come from different distributions pVal <- sameOrGreater / numResamples pVal } ######################################################################################################## getMovingAverage <- function(values, windowSize=2){ movingAverage <- c() for(i in 1:length(values)){ if(i maxPosDist){ maxPosDist <- currDistance } } } maxPosDist } #Runs Monte Carlo analysis to determine if subject contains significant peaks (returns p-value) hasSignificantPeaks <- function(filename, numResamples=5000){ allData <- read.delim(filename) distro <- allData[[1]] movingAvg <- getMovingAverage(distro) criterion <- getMaxPositiveDistance(movingAvg) numHigher <- 0; for(i in 1:numResamples){ #Resample with replacement newSample <- getMovingAverage(sample(distro, replace=TRUE)) if(getMaxPositiveDistance(newSample) >= criterion){ numHigher <- numHigher+1 } } #If p-val is less than say .05, we can reject the null hypothesis that "peaks" in the #original data occurred by chance. pVal <- numHigher / numResamples; pVal } ############################################## #setwd("/Users/macw/Desktop/BrianClass/") #list.files() #source("dynamicSLD.R") #ls() #smooth("jorge.txt") #smoothLocal(alberto) #minMax("jorge.txt") #isMoreVariable("sampleData.txt") #hasSignificantPeaks("alberto.txt")