#Read of the data in rubber.csv
rubber<-read.csv2(file.choose())  # look for rubber.csv
rubber.cc<-rubber[complete.cases(rubber),]#drop out the first ligne
x=rubber.cc$Prices
y=rubber.cc$Return
u=log(y+1)
plot(x,type="p")
plot(y,type="l")
plot(u,type="l")
####histogram of returns and theoretical model
bornes=seq(floor(100*min(y))/100-0.0025,ceiling(max(y)*100)/100+0.0025,0.005)
hist(y,breaks=bornes,col="blue",freq=F)       #too many zeros!
mu=mean(y);mu 
sigma=sd(y);sigma  
magaussienne=function(x){return(dnorm(x,mu,sigma))} 
par(new=TRUE) 
plot(magaussienne,min(y),max(y),col="red",add=TRUE) 

####histogram of logreturns and theoretical model
bornesu=seq(floor(100*min(u))/100-0.0025,ceiling(max(u)*100)/100+0.0025,0.005)
hist(u,breaks=bornesu,col="blue",freq=F)       #too many zeros!
muu=mean(u);muu 
sigmau=sd(u);sigmau  
magaussienne=function(x){return(dnorm(x,muu,sigmau))} 
par(new=TRUE) 
plot(magaussienne,min(u),max(u),col="red",add=TRUE) 

####remove the peak from returns
ysort=sort(y);
ysort;#we see zeros from 555 to 934
length(y);
ycl=c(ysort[1:554],ysort[935:1583]); #we clean y by removing the zeros

####remove the peak from logreturns
usort=sort(u);
usort;#we see zeros from 555 to 934
length(u);
ucl=c(usort[1:554],usort[935:1583]); #we clean y by removing the zeros

hist(ycl,breaks=bornes,col="blue",freq=F)
mu=mean(ycl);mu 
sigma=sd(ycl);sigma  
magaussienne=function(x){return(dnorm(x,mu,sigma))} 
par(new=TRUE) 
plot(magaussienne,min(ycl),max(ycl),col="red",add=TRUE) 

hist(ucl,breaks=bornesu,col="blue",freq=F)
muucl=mean(ucl);muucl 
sigmaucl=sd(ucl);sigmaucl  
magaussienne=function(x){return(dnorm(x,muucl,sigmaucl))} 
par(new=TRUE) 
plot(magaussienne,min(ucl),max(ucl),col="red",add=TRUE) 


qqy=qqnorm(y);
qqline(y,col="red")
plot(qqy$x,qqy$y)
reg=lm(qqy$y~qqy$x)
abline(reg,col="blue")
qql=qqline(y,col="red")

qqu=qqnorm(u);
qqline(u,col="green")
plot(qqu$x,qqu$y)
regu=lm(qqu$y~qqu$x)
abline(regu,col="blue")
qqlu=qqline(u,col="green")


ZZ=function(y)
{qqy=qqnorm(sort(y));
xx=qqy$x;
yy=qqy$y;
mini=1;
maxi=length(xx);
d=maxi-mini;
zz=data.frame(a=mini,b=maxi,rho=RR(xx,yy,mini,maxi))
while #(d>length(y)-10){
     (d>1){
            Rmini=RR(xx,yy,mini+1,maxi) # we drop lowest point 
		Rmaxi=RR(xx,yy,mini,maxi-1) # we drop highest point 
		if(Rmaxi>Rmini) # here we drop highest point
			{newrow=data.frame(a=mini,b=maxi-1,rho=Rmaxi)
			zz=rbind(zz,newrow)
			#ycl=ycl[-right]
			maxi=maxi-1}
		else {newrow=data.frame(a=mini+1,b=maxi,rho=Rmini)
			zz=rbind(zz,newrow)
			#ycl=ycl[-1]
			mini=mini+1}
		d=maxi-mini}   # actually d=d-1
return(zz)
}

plot(ZZ(u)$rho)
plot(ZZ(ucl)$rho)

#####################################
minimaxi=function(ZZ)  # on cherche l'indice i0 du premier maximum de R^2
{i=1;best=ZZ[i,3];
while(ZZ[i+1,3]>=best){best=ZZ[i+1,3];i=i+1}
return(i)}

plotZZ=function(y)
{ZZ2y=ZZ(y)
minimaxiZZ2y=minimaxi(ZZ2y)
LowHigh=ZZ(y)[minimaxiZZ2y,1:2]    # les indices mini et maxi i0
 MyIndices=1:length(y)
 couleur=ifelse((MyIndices>=LowHigh[1,1])&(MyIndices<=LowHigh[1,2])
,"blue","red")
qqq=qqnorm(y)
qqqx=qqq$x
qqqy=qqq$y
plot(sort(qqqx),sort(qqqy),col=couleur)  #  qqnorm colorié
reg=lm(qqqy~qqqx)
abline(reg)
return(ZZ2y[minimaxiZZ2y,])
}
plotZZ(y)
plotZZ(ycl)
plotZZ(u)
plotZZ(ucl)
abline(regu)

yshort=sort(y)[97:1533]  # analyse des rendements tronqués
qqny=qqnorm(yshort)
qqline(yshort)
hist(yshort)

ushort=sort(u)[97:1533]  # analyse des rendements tronqués
qqnu=qqnorm(ushort)
qqline(ushort)
hist(ushort,breaks=bornesu,col="blue",freq=F)
muushort=mean(ushort);muushort 
sigmaushort=sd(ushort);sigmaushort  
magaussienne=function(x){return(dnorm(x,muushort,sigmaushort))} 
par(new=TRUE) 
plot(magaussienne,-0.05,0.05,col="red",add=TRUE) 


muy=mean(yshort);sigy=sd(yshort)   # comparaison avec une simulation normale
samp=rnorm(1533-147,muy,sigy)
qqnorm(samp)

R2=function(y){qqy=
reg=lm(qqnorm(y)$y~qqnorm(y)$x)
return(summary(reg)[[8]][[1]])
}
R2(ycl)