9 Feb 2016 • on

Geary's C and Moran's I functions in R

Gearys’ C and Morans’I autocorrelograms are very common measures of spatial or temporal aucorrelation. Although there are functions to calculate these measures in R, I was surprised that I couldn’t find a perfectly satisfactory solution in any package. The functions used in spdep are based on distances through a neighbourhood matrix instead of straight Eucledian distances. Results obtained from them are therefore highly depdendent on the choice of the number of nearest neighbours.

The functions provided below calculate Geary’s C and Moran’s I autocorrelograms using standard distance classes derived from Eucledian distances.

Given a grid of spatial coordinates {x,y}, a vector of data and a vector of distance classes.

sgrid<-cbind(rep(1:10,10),kronecker(1:10,rep(1,10)))
data<-rnorm(100) #White noise example
dclasses<-1:8

The functions can be called with

gc<-GearyC(sgrid,data,dclasses)
gc
plot(gc)
mI<-MoranI(sgrid,data,dclasses)
mI
plot(mI)

Here are the functions

GearyC <- function(sgrid,data,dclasses){
  
  # Guillaume Larocque 2016
  
  D <- dist(sgrid)
  Dmat<-as.matrix(D)
  if (length(dclasses)==1){ #Based on equal frequencies
    sd<-sort(D)
    ngoal<-floor(length(sd)/dclasses)
    dcl<-0
    for (i in c(1:dclasses)){
      if (i==dclasses){
        tt<-tail(sd,n=1)
      }else{
        tt<-sd[ngoal]
      }
      dcl<-cbind(dcl,sd[tail(which(sd==tt),1)])
      sd<-sd[-(1:ngoal)]
    }
    dclasses<-dcl
  }
  cls<-t(dclasses)
  nd<-length(dclasses)
  if (!is.matrix(data)){
     data=as.matrix(data)
  }
  n<-nrow(data);
  VarC<-Z<-prob<-Nh<-cl<-rep(0,length=nd-1)
  
  data=data.frame(data)
  names(data)='data'
  SPDF <- SpatialPointsDataFrame(coords=sgrid, data=data)
  vario <- variogram(data~1,SPDF, boundaries=cls)
  gC=vario$gamma/var(data);
  # Significance testing
  for (i in (1:(nd-1))){
    iHH<-(Dmat>dclasses[i]) & (Dmat<=dclasses[i+1])
    cl[i]<-mean(Dmat[iHH>0])
    Nh[i]=vario$np[i]*2
    S1=Nh[i]*2
    S2<-sum((2*apply(iHH,1,sum))^2)
    b2<-n*sum((data[,1]-mean(data[,1]))^4)/(sum((data[,1]-mean(data[,1]))^2)^2)
    t1=(n-1)*S1*((n^2-3*n+3)-(n-1)*b2)
    t2=n*(n-2)*(n-3)*(Nh[i]^2)
    t3=-0.25*(n-1)*S2*(n^2+3*n-6-(n^2-n+2)*b2)+Nh[i]^2*(n^2-3+(-(n-1)^2)*b2)
    VarC[i]=t1/t2+t3/t2;
    if (gC[i]>1){
      if (Nh[i]>(4*(n-sqrt(n))) & Nh[i] <= (4*(2*n-3*sqrt(n)+1))) {
        probs<-0.5
        tt<-1
        while (tt>0.000001){
          Z<-(gC[i]+(sqrt(10*probs)*solve(n-1)))/sqrt(VarC[i])
          pr<-pnorm(Z)
          tt<-abs(probs-pr)
          probs<-pr
        }
        prob[i]<-probs
      }else{
        probs<-0.5
        tt<-1
        while (tt>0.000001){
          Z<-(gC[i]-1+solve(n-1))/sqrt(VarC[i])
          pr<-pnorm(-Z)
          tt<-abs(probs-pr)
          probs<-pr
        }
        prob[i]<-probs;
      }
    } else {
      Z[i]<-(gC[i]-1)/sqrt(VarC[i])
      prob[i]<-pnorm(Z[i]);
    }
  }
  setClass("gearyC",slots=c(distances="vector", 
                          gC="vector", 
                          number_pairs="vector",
                          prob.adjust="vector",
                          varC="vector"
           )
  )
  error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
    if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
      stop("vectors must be same length")
    arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
  }
  
  out=new("gearyC",distances=cl,gC=gC,number_pairs=Nh/2,prob.adjust=p.adjust(prob,'holm'),varC=VarC)
  setMethod("plot", "gearyC",  function (x, xlab ="" , ylab ="" , axes = FALSE , asp =1 ){
    maxd=max(x@distances)
    plot(x@distances,x@gC,xlab='Distance',ylab='Geary\'s C',xlim=c(0, maxd+maxd/20), ylim=c(0, 1.15))
    lines(c(0,maxd+maxd/10),c(1,1))
    title('Geary\'s C')
    error.bar(x@distances,x@gC, 1.96*sqrt(x@varC))
  })
  
  setMethod("show", "gearyC",  function (object) {
    print(data.frame(distances=cl,gC=gC,number_pairs=Nh/2,prob.adjust=p.adjust(prob,'holm'),varC=VarC))
  })
  return(out)
}

MoranI <- function(sgrid,data,dclasses){

# Guillaume Larocque 2016
  
  D <- dist(sgrid)
  Dmat<-as.matrix(D)
  if (length(dclasses)==1){ #Based on equal frequencies
    sd<-sort(D)
    ngoal<-floor(length(sd)/dclasses)
    dcl<-0
    for (i in c(1:dclasses)){
      if (i==dclasses){
        tt<-tail(sd,n=1);
      }else{
        tt<-sd[ngoal];
      }
      dcl<-cbind(dcl,sd[tail(which(sd==tt),1)]);
      sd<-sd[-(1:ngoal)];
    }
    dclasses<-dcl;
  }
  cls<-t(dclasses);
  nd<-length(dclasses);
  if (!is.matrix(data)){
    data=as.matrix(data)
  }
  n<-nrow(data);
  Nh<-I<-cl<-VarI<-Z<-prob<-rep(0,length=nd-1)
  
  # Significance testing
  for (i in (1:(nd-1))){
    iHH<-(Dmat>dclasses[i]) & (Dmat<=dclasses[i+1])
    md<-mean(data[,1])
    prodd<-(t(data[,1]-md) %*% iHH %*% (data[,1]-md))/2
    Nh[i]<-sum(iHH>0)/2
    I[i]<-(1 / Nh[i])*prodd / (var(data[,1])*(n-1)/n)
    cl[i]<-mean(Dmat[iHH>0])
    Nh[i]<-Nh[i]*2
    S1<-Nh[i]*2
    S2<-sum((2*apply(iHH,1,sum))^2)
    b2<-n*sum((data[,1]-mean(data[,1]))^4)/(sum((data[,1]-mean(data[,1]))^2)^2)
    t1<-n*((n^2-3*n+3)*S1-n*S2+3*Nh[i]^2)
    t2<-b2*((n^2-n)*S1-2*n*S2+6*Nh[i]^2)
    t3<-(n-1)*(n-2)*(n-3)*(Nh[i]^2)
    VarI[i]<-((t1-t2)/t3)-solve((n-1)^2)
    if (Nh[i]>(4*(n-sqrt(n))) & Nh[i] <= (4*(2*n-3*sqrt(n)+1))) {
      if (I[i]<0){
        probs<-0.5
        tt=1
        while (tt>0.000001){
          Z<-(I[i]+(sqrt(10*probs)*solve(n-1)))/sqrt(VarI[i])
          pr<-pnorm(Z)
          tt<-abs(probs-pr)
          probs<-pr
        }
        prob[i]<-probs
      }else{
        probs<-0.5
        tt<-1
        while (tt>0.000001){
          Z<-(I[i]+(sqrt(10*probs)*solve(n-1)))/sqrt(VarI[i])
          pr<-pnorm(-Z)
          tt<-abs(probs-pr)
          probs<-pr
        }
        prob[i]<-probs;
      }
    } else {
      Z[i]<-(I[i]+solve(n-1))/sqrt(VarI[i])
      if (I[i]<0){
        prob[i]<-pnorm(Z[i]);
      }else{
        prob[i]<-pnorm(-Z[i]);
      }
    }
  }
  setClass("moranI",slots=c(distances="vector", 
                            mI="vector", 
                            number_pairs="vector",
                            prob.adjust="vector",
                            varI="vector"
  )
  )
  error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
    if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
      stop("vectors must be same length")
    arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
  }
  
  out=new("moranI",distances=cl,mI=I,number_pairs=Nh/2,prob.adjust=p.adjust(prob,'holm'),varI=VarI)
  setMethod("plot", "moranI",  function (x, xlab ="" , ylab ="" , axes = FALSE , asp =1 ){
    maxd=max(x@distances)
    maxy=max(x@mI)
    plot(x@distances,x@mI,xlab='Distance',ylab='Moran\'s I',xlim=c(0, maxd+maxd/20), ylim=c(-0.15, 1.15))
    title('Moran\'s I')
    lines(c(0,maxd+maxd/10),c(0,0))
    error.bar(x@distances,x@mI, 1.96*sqrt(x@varI))
  })
  
  setMethod("show", "moranI",  function (object) {
    print(data.frame(distances=cl,mI=I,number_pairs=Nh/2,prob.adjust=p.adjust(prob,'holm'),varI=VarI))
  })
  return(out)
}