#Read of the data in rubber.csv rubber<-read.csv2(file.choose()) # look for rubber.csv rubber.cc<-rubber[complete.cases(rubber),]#drop out the first ligne x=rubber.cc$Prices y=rubber.cc$Return u=log(y+1) plot(x,type="p") plot(y,type="l") plot(u,type="l") ####histogram of returns and theoretical model bornes=seq(floor(100*min(y))/100-0.0025,ceiling(max(y)*100)/100+0.0025,0.005) hist(y,breaks=bornes,col="blue",freq=F) #too many zeros! mu=mean(y);mu sigma=sd(y);sigma magaussienne=function(x){return(dnorm(x,mu,sigma))} par(new=TRUE) plot(magaussienne,min(y),max(y),col="red",add=TRUE) ####histogram of logreturns and theoretical model bornesu=seq(floor(100*min(u))/100-0.0025,ceiling(max(u)*100)/100+0.0025,0.005) hist(u,breaks=bornesu,col="blue",freq=F) #too many zeros! muu=mean(u);muu sigmau=sd(u);sigmau magaussienne=function(x){return(dnorm(x,muu,sigmau))} par(new=TRUE) plot(magaussienne,min(u),max(u),col="red",add=TRUE) ####remove the peak from returns ysort=sort(y); ysort;#we see zeros from 555 to 934 length(y); ycl=c(ysort[1:554],ysort[935:1583]); #we clean y by removing the zeros ####remove the peak from logreturns usort=sort(u); usort;#we see zeros from 555 to 934 length(u); ucl=c(usort[1:554],usort[935:1583]); #we clean y by removing the zeros hist(ycl,breaks=bornes,col="blue",freq=F) mu=mean(ycl);mu sigma=sd(ycl);sigma magaussienne=function(x){return(dnorm(x,mu,sigma))} par(new=TRUE) plot(magaussienne,min(ycl),max(ycl),col="red",add=TRUE) hist(ucl,breaks=bornesu,col="blue",freq=F) muucl=mean(ucl);muucl sigmaucl=sd(ucl);sigmaucl magaussienne=function(x){return(dnorm(x,muucl,sigmaucl))} par(new=TRUE) plot(magaussienne,min(ucl),max(ucl),col="red",add=TRUE) qqy=qqnorm(y); qqline(y,col="red") plot(qqy$x,qqy$y) reg=lm(qqy$y~qqy$x) abline(reg,col="blue") qql=qqline(y,col="red") qqu=qqnorm(u); qqline(u,col="green") plot(qqu$x,qqu$y) regu=lm(qqu$y~qqu$x) abline(regu,col="blue") qqlu=qqline(u,col="green") ZZ=function(y) {qqy=qqnorm(sort(y)); xx=qqy$x; yy=qqy$y; mini=1; maxi=length(xx); d=maxi-mini; zz=data.frame(a=mini,b=maxi,rho=RR(xx,yy,mini,maxi)) while #(d>length(y)-10){ (d>1){ Rmini=RR(xx,yy,mini+1,maxi) # we drop lowest point Rmaxi=RR(xx,yy,mini,maxi-1) # we drop highest point if(Rmaxi>Rmini) # here we drop highest point {newrow=data.frame(a=mini,b=maxi-1,rho=Rmaxi) zz=rbind(zz,newrow) #ycl=ycl[-right] maxi=maxi-1} else {newrow=data.frame(a=mini+1,b=maxi,rho=Rmini) zz=rbind(zz,newrow) #ycl=ycl[-1] mini=mini+1} d=maxi-mini} # actually d=d-1 return(zz) } plot(ZZ(u)$rho) plot(ZZ(ucl)$rho) ##################################### minimaxi=function(ZZ) # on cherche l'indice i0 du premier maximum de R^2 {i=1;best=ZZ[i,3]; while(ZZ[i+1,3]>=best){best=ZZ[i+1,3];i=i+1} return(i)} plotZZ=function(y) {ZZ2y=ZZ(y) minimaxiZZ2y=minimaxi(ZZ2y) LowHigh=ZZ(y)[minimaxiZZ2y,1:2] # les indices mini et maxi i0 MyIndices=1:length(y) couleur=ifelse((MyIndices>=LowHigh[1,1])&(MyIndices<=LowHigh[1,2]) ,"blue","red") qqq=qqnorm(y) qqqx=qqq$x qqqy=qqq$y plot(sort(qqqx),sort(qqqy),col=couleur) # qqnorm colorié reg=lm(qqqy~qqqx) abline(reg) return(ZZ2y[minimaxiZZ2y,]) } plotZZ(y) plotZZ(ycl) plotZZ(u) plotZZ(ucl) abline(regu) yshort=sort(y)[97:1533] # analyse des rendements tronqués qqny=qqnorm(yshort) qqline(yshort) hist(yshort) ushort=sort(u)[97:1533] # analyse des rendements tronqués qqnu=qqnorm(ushort) qqline(ushort) hist(ushort,breaks=bornesu,col="blue",freq=F) muushort=mean(ushort);muushort sigmaushort=sd(ushort);sigmaushort magaussienne=function(x){return(dnorm(x,muushort,sigmaushort))} par(new=TRUE) plot(magaussienne,-0.05,0.05,col="red",add=TRUE) muy=mean(yshort);sigy=sd(yshort) # comparaison avec une simulation normale samp=rnorm(1533-147,muy,sigy) qqnorm(samp) R2=function(y){qqy= reg=lm(qqnorm(y)$y~qqnorm(y)$x) return(summary(reg)[[8]][[1]]) } R2(ycl)