# load the quantreg library library(quantreg) # read the weekly gas price data - 8/20/90 to 4/02/07 price<-read.table(file="C:/Documents and Settings/Andrew Buck/My Documents/Econ 616/gasprices.csv", header = TRUE, sep = ",", dec = ".", na.strings = "NA",check.names = TRUE) # put the price data in lower case x x <- price[,2] # what is the length of the series n <- length(x) # set up the distributed lags in upper case X. p <- 5 X <- cbind(x[(p - 1):(n - 1)], x[(p - 2):(n - 2)], x[(p - 3):(n - 3)], x[(p - 4):(n - 4)]) # put current price in y y <- x[p:n] # Part 1. ____________________________________________________________________________________ # Plot this weeks price against last weeks price plot(X[,1],y,col="blue",cex=0.25,xlab="Last Week's Price",ylab="This Week's Price") # Put in the some quantiles abline(rq(y ~ X[,1], tau = 0.1), col = "green") abline(rq(y ~ X[,1], tau = 0.5), col = "green") abline(rq(y ~ X[,1], tau = 0.9), col = "green") # Part 2. ____________________________________________________________________________________ # Let us construct conditional quantiles and density plots for the gas price data z <- rq(y ~ X[,1], tau = -1) # First find the 25th percentile and 75th percentile of prices gas.lo <- quantile(X[,1], 0.25) gas.hi <- quantile(X[,1], 0.75) # put the primal solution to the LP problem in ps ps <- z$sol[1, ] # construct fitted values at the 25th and 75th percentile. Note the notation for matrix multiplication qs.lo <- c(c(1, gas.lo) %*% z$sol[4:5, ]) qs.hi <- c(c(1, gas.hi) %*% z$sol[4:5, ]) # the next line declares mfrow as a matrix with one row and two columns so that we can do side by side graphs par(mfrow = c(1, 2)) # plot the quantiles. The first plot command sets up the plot frame and labels. The next two do the plots. plot(c(ps, ps), c(qs.lo, qs.hi), type = "n", xlab = expression(tau), ylab = "quantile") plot(stepfun(ps, c(qs.lo[1], qs.lo)), do.points = FALSE,add = TRUE,col.hor="blue",col.vert="green") plot(stepfun(ps, c(qs.hi[1], qs.hi)), do.points = FALSE,add = TRUE, col.hor = "red", col.vert = "blue") ps.wts <- (c(0, diff(ps)) + c(diff(ps), 0))/2 # in the next two lines construct the kernel densities for low and high prices ap <- akj(qs.lo, z = qs.lo, p = ps.wts) ar <- akj(qs.hi, z = qs.hi, p = ps.wts) # set up the second box for a graph and then draw the two lines. plot(c(qs.lo, qs.hi), c(ap$dens, ar$dens),type = "n", xlab = "Gas Price", ylab = "Density") lines(qs.lo, ar$dens, col = "red") lines(qs.hi, ap$dens, col = "black") legend("topright", c("Lo P", "Hi P"), lty = c(1,1), col = c("red", "black")) # Part 3. ____________________________________________________________________________________ # hypothesis testing # 3.A Use the summary command to present t-statistics summary(z) # 3.B Example of plotting of coefficients and their confidence bands plot(summary(rq(y~X[,1]+X[,2]+X[,3]+X[,4],tau = 1:49/50))) # 3.C do the estimated quantile regression relationships conform to the location shift # hypothesis that assumes that all of the conditional quantile functions have the same # slope parameters. Test equality of the slope across all quantiles fit1 <- rq(y ~ X[,1], tau = 0.25) fit2 <- rq(y ~ X[,1], tau = 0.5) fit3 <- rq(y ~ X[,1], tau = 0.75) anova(fit1,fit2,fit3) # 3.D Choose the best specification from two nested models fit0 <- rq(y~X[,1]+X[,2]+X[,3]+X[,4],tau=0.25) anova(fit0,fit1) # Now do the QR on all four lags and a set of quantiles taus <- c(.05,.1,.25,.75,.9,.95) qreg<-rq(y~X[,1]+X[,2]+X[,3]+X[,4],tau=taus) # get a summary of the results summary(qreg) #put the residuals for each quantile in r1 r1<-resid(qreg) #put the coefficients for each quantile in c1 c1<-coef(qreg) # 3.E Do Khmaladze on the complete tau grid and then on a not quite so fine grid T1 <- KhmaladzeTest(y ~ X, taus = -1, nullH = "location") T2 <- KhmaladzeTest(y ~ X, taus = 10:290/300,nullH = "location", se = "ker") T3 <- KhmaladzeTest(y ~ X, taus = -1, nullH = "location-scale") T4 <- KhmaladzeTest(y ~ X, taus = 10:290/300,nullH = "location-scale", se = "ker") # you will note that the results for T1 and T2 differ slightly. # Part 4. ___________________________________________________________________________ # non-linear quantile regression # Create some data that has the shape of a logistic n <- 200 df <- 8 delta <- 8 set.seed(4003) x <- sort(rt(n, df)) u <- runif(n) v <- -log(1 - (1 - exp(-delta))/(1 + exp(-delta * pt(x, df)) * ((1/u) - 1)))/delta y <- qt(v, df) # plot the true quartiles "lprq" <- function(x, y, h, m = 50, tau = 0.5) { xx <- seq(min(x), max(x), length = m) fv <- xx dv <- xx for (i in 1:length(xx)) { z <- x - xx[i] wx <- dnorm(z/h) r <- rq(y ~ z, weights = wx, tau = tau, ci = FALSE) }} plot(x, y, col = "blue", cex = 0.25) us <- c(0.25, 0.5, 0.75) for (i in 1:length(us)) { u <- us[i] v <- -log(1 - (1 - exp(-delta))/(1 + exp(-delta * pt(x, df)) * ((1/u) - 1)))/delta lines(x, qt(v, df)) } # do the parametric quartile estimates for the data and plot them Dat <- NULL Dat$x <- x Dat$y <- y deltas <- matrix(0, 3, length(us)) FrankModel <- function(x, delta, mu, sigma, df, tau) { z <- qt(-log(1 - (1 - exp(-delta))/(1 + exp(-delta * pt(x, df)) * ((1/tau) - 1)))/delta, df) mu + sigma * z } for (i in 1:length(us)) { tau = us[i] fit <- nlrq(y ~ FrankModel(x, delta, mu, sigma, df = 8, tau = tau), data = Dat, tau = tau, start = list(delta = 5, mu = 0, sigma = 1), trace = TRUE) lines(x, predict(fit, newdata = x), lty = 2, col = "green") deltas[i, ] <- coef(fit) } # Part 5. ________________________________________________________________________ # To run this part you may have to close R and then re-launch it. # Two approaches to smoothing data using quantiles # 5.A In the first method is a locally polynomial regression library(MASS) data(mcycle) attach(mcycle) plot(times, accel, xlab = "milliseconds", ylab = "acceleration") hs <- c(1, 2, 3, 4) for (i in hs) { h = hs[i] fit <- lprq(times, accel, h = h, tau = 0.5) fit lines(fit$xx, fit$fv, lty = i) } # 5.B A splines approach library(splines) plot(times, accel, xlab = "milliseconds", ylab = "acceleration",type = "n") points(times, accel, cex = 0.75) X <- model.matrix(accel ~ bs(times, df = 15)) for (tau in 1:3/4) { fit <- rq(accel ~ bs(times, df = 15), tau = tau, data = mcycle) accel.fit <- X %*% fit$coef lines(times, accel.fit) } legend(45, -70, c("h=1", "h=2", "h=3", "h=4"),lty = 1:length(hs)) # Part 6. ________________________________________________________________ # Finally, consider fitting an envelope around data, kind of like DEA data(Mammals) attach(Mammals) x <- log(weight) y <- log(speed) plot(x, y, xlab = "Weight in log(Kg)", ylab = "Speed in log(Km/hour)",type = "n") points(x[hoppers], y[hoppers], pch = "h", col = "red") points(x[specials], y[specials], pch = "s", col = "blue") others <- (!hoppers & !specials) points(x[others], y[others], col = "black", cex = 0.75) fit <- rqss(y ~ qss(x, lambda = 1), tau = 0.9) plot(fit)