
alberto <- c(32,20,5,23,27,32,21,20,12,1,1,21,22,23,31,17,34,15,26,38);
jorge <- c(38,13,7,2,32,21,70,65,27,31,30,33,28,40,31,25,38,36,31,40);

smoothLocal <- function(data, color=2){
	plot((1:length(data)), data, type="o", ylab="y", xlab="x")
	
	lines(lowess(data), col=color)	
}


#Takes a tab-delimited file with column headings and plots smoothing functions for each column
smooth <- function(fileName, plotOnTop=FALSE){
	
	allData <- read.delim(fileName)
	
	for(i in 1:length(names(allData))) {
		aColumn <- allData[[i]]
		if(i == 1){
			if(plotOnTop){
				lines((1:length(aColumn)), aColumn, type="o", ylab="y", xlab="x", main="")

			}
			else{
				plot((1:length(aColumn)), aColumn, type="o", ylab="y", xlab="x", main=names(allData[i]))
			}
		}
		else{
			lines((1:length(aColumn)), aColumn, type="o", ylab="y", xlab="x")
		}
		
		lines(lowess(aColumn), col=i+1)
	}
}

########################################################################################################

minMax <- function(fileName, windowSize=5){
	allData <- read.delim(fileName)
	
	aColumn <- allData[[1]];
	
	plot((1:length(aColumn)), aColumn, type="p", ylab="y", xlab="x", main=names(allData))
	
	# Get the mins of a moving window
	mins <- c();
	maxes <- c();
	for(i in 1:length(aColumn)){

		midPoint = windowSize %/% 2
		
		if((i-midPoint)<=0){
			low=0
		}
		else{
			low=i-midPoint;
		}
		
		if((i+midPoint)>=length(aColumn)){
			high=length(aColumn)
		}
		else{
			high=i+midPoint;
		}
		
		
		mins[i] = min(aColumn[low:high])
		maxes[i] = max(aColumn[low:high])
	}
	
	lines(1:length(aColumn), mins, type="l", col=3)
	lines(1:length(aColumn), maxes, type="l", col=4)
}

########################################################################################################
getDistances <- function(values){
	distance <- c()
	for(i in 2:length(values)){
		distance[i-1] <- abs(values[i] - values[i-1])
	}
	
	distance
}


#Expects filename to have SLD data (sequential counts of some language structure usage) from two individuals
#Runs Monte Carlo analysis to determine if one is significantly more variable than the other
isMoreVariable <- function(filename, numResamples=5000){
	
	allData <- read.delim(filename)
	
	distro1 <- allData[[1]]
	distro2 <- allData[[2]]
	
	# First compute testing criterion
	first <- mean(getDistances(distro1))
	second <- mean(getDistances(distro2))
	testCriterion <- abs(first-second)

	# Now resample and compare samples against testCriterion
	allNums <- c(getDistances(distro1), getDistances(distro2))

	sameOrGreater <- 0
	for(i in 1:numResamples){
		#Shuffle all distances
		newSample <- sample(allNums)

		#Split shuffled list of distances into two halves and calculate mean of each
		mean1 <- mean(newSample[1:(length(newSample)/2)])
		mean2 <- mean(newSample[((length(newSample)/2)+1):length(newSample)])

		#Count times this difference of means is greater or equal to the difference of means from the original (testCriterion)
		if(mean1-mean2 >= testCriterion){
			sameOrGreater <- (sameOrGreater + 1)
		}
	}

	#Calculate the probability randomly sampled distributions differ more or as much as the original two (p-value)
	#If less than say .05, we can reject null-hypothesis and assume the original two datasets come from different distributions
	pVal <- sameOrGreater / numResamples
	pVal	
}

########################################################################################################
getMovingAverage <- function(values, windowSize=2){
	
	movingAverage <- c()

	for(i in 1:length(values)){
		if(i<windowSize){
			movingAverage[i] <- sum(values[1:windowSize]) / windowSize;
		}
		else{
			movingAverage[i] <- sum(values[(i-(windowSize-1)):i]) / windowSize;
		}

	}

	movingAverage
}



#Calculates the maximum positive distance between points at 1 through (maxStep-1) apart
getMaxPositiveDistance <- function(values, maxStep=6){

	maxPosDist <- 0
	#Try every delay length
	for(step in 2:maxStep){
		#Calculate distances between points "delay" units away from each other
		for(i in step:length(values)){
			currDistance <- values[i] - values[i-(step-1)]

			if(currDistance > maxPosDist){
				maxPosDist <- currDistance
			}
		}
	}

	maxPosDist
}



#Runs Monte Carlo analysis to determine if subject contains significant peaks (returns p-value)
hasSignificantPeaks <- function(filename, numResamples=5000){

	allData <- read.delim(filename)

	distro <- allData[[1]]

	movingAvg <- getMovingAverage(distro)

	criterion <- getMaxPositiveDistance(movingAvg)

	numHigher <- 0;
	for(i in 1:numResamples){
		#Resample with replacement
		newSample <- getMovingAverage(sample(distro, replace=TRUE))

		if(getMaxPositiveDistance(newSample) >= criterion){
			numHigher <- numHigher+1
		}
	}

	#If p-val is less than say .05, we can reject the null hypothesis that "peaks" in the
	#original data occurred by chance.
	pVal <- numHigher / numResamples;

	pVal
}









##############################################
#setwd("/Users/macw/Desktop/BrianClass/")
#list.files()
#source("dynamicSLD.R")
#ls()
#smooth("jorge.txt")
#smoothLocal(alberto)
#minMax("jorge.txt")
#isMoreVariable("sampleData.txt")
#hasSignificantPeaks("alberto.txt")


