############################################################################################### # Ecology > Functional Ecology > Functionnal Diversity (FD) index calculation # # Calculation of FD according to the Oikos paper : # # Mouchet M., Guilhaumon F., Villéger S., Mason N., Tomasini J. A., Mouillot D. # Towards a consensus for calculating dendrogram-based functional diversity indices. # 2008. OIKOS (Volume 117 Issue 5, Pages 794 - 800) # # Code by François Guilhaumon (francois.guilhaumon@univ-montp2.fr) # Xtree, Getlength.inner and Getlength functions by Owen Petchey (o.petchey@sheffield.ac.uk) # # Please reffer to upcited paper as appropriate ############################################################################################## ############################################################################################## # UPDATES # # 7 april 2008 : added $ultram to the result list # 25 June 2013 : change in input format # ############################################################################################## GFD_matcomm <- function(mat,comp_comm=NA,distances=NA,dissimMethod="cophenetic",methods=NA,verb=FALSE) { ##################################################################### # Functional Diversity (FD) index calculation # # # # Required packages : cluster, clue, gtools # # # # Arguments : # # mat : a sXt species/traits matrix (with species labels for # # communities FD calculations) # # comp_comm : a matrix communitiesXspecies (presence=1, absence=0) # # with same species labels than in mat # # distances : distance(s) to use (NA means all) # # methods : clustering method(s) to use (NA means all) # # dissimMethod : how dissimilarities are calculated ( # # argument 'method' of the function cl_dissimilarity # # (package clue)) # # verb : should verbosity be activated ? # ##################################################################### require(gtools) require(cluster) require(clue) ######################################################## # Xtree and GetLength function by Owen Petchey # ######################################################## Xtree <- function(tree){ species.names <- tree$labels merge <- tree$merge height <- tree$height s <- nrow(merge)+1 nodeid <- matrix(integer(s*(s-1)), s, s-1) height1 <- matrix(double(s*(s-1)), s, s-1) branch <- matrix(double(s*(s-1)), s, s-1) branchid <- matrix(character(s*(s-1)), s, s-1) for(i in (s-1):1) for(j in 1:2){ goto <- matrix(logical(i*2), i, 2) goto[i,j] <- 1; for(k in i:1) for(l in 1:2){ speciescount <- 0 if(goto[k,l] == 1){ if(merge[k,l] > 0){ goto[merge[k,l],1] <- 1 goto[merge[k,l],2] <- 1 } if(merge[k,l] < 0){ nodeid[-merge[k,l],i] <- i height1[-merge[k,l],i] <- height[i] } } } } for(i in 1:s){ last <- 0 for(j in 1:(s-1)){ if(nodeid[i,j] > 0){ branch[i,j] <- height1[i,j] - last last <- height1[i,j] } } } for(n in 1:(s-1)) for(i in 1:s){ if(nodeid[i,n] == 0) branchid[i,n] <- paste("0", sep="-") if(nodeid[i,n] > 0){ if(n == 1) branchid[i,n] <- paste(i, "0", nodeid[i,n], sep="-") if(n > 1){ j <- 1 while(j < n && nodeid[i,n-j] == 0) j <- j+1 if(j == n) j <- j - 1 if(nodeid[i,n-j] == 0) branchid[i,n] <- paste(i, nodeid[i,n-j], nodeid[i,n], sep="-") else branchid[i,n] <- paste(nodeid[i,n-j], nodeid[i,n], sep="-") } } } unibranch <- unique(branchid[branchid != "0"]) H1 <- matrix(integer(s*(2*s-2)), s, 2*s-2) h2.prime <- double(2*s-2) for(i in 1:s) for(j in 1:(s-1)) for(k in 1:(2*s-2)) if(unibranch[k] == branchid[i,j]) h2.prime[k] <- branch[i,j] names(h2.prime) <- unibranch for(i in 1:s) for(j in 1:(s-1)) for(k in 1:(2*s-2)) if(branchid[i,j] == unibranch[k]) H1[i,k] <- 1 dimnames(H1) <- list(species.names,unibranch) xtree <- list("h2.prime"=h2.prime, "H1"=H1) class(xtree) <- "xtree" xtree } Getlength.inner <- function(xtree){ if(!is.matrix(xtree[[2]])) result <- 0 if(is.matrix(xtree[[2]])) result = sum(xtree[[1]][colSums(xtree[[2]]) != 0 & colSums(xtree[[2]]) < length(xtree[[2]][,1])]) result } Getlength <- function(xtree, comp=NA){ if(!is.data.frame(comp)) result <- Getlength.inner(xtree) if(is.data.frame(comp)){ S <- tapply(comp[,2], comp[,1], function(x) length(x)) FD <- tapply(comp[,2], comp[,1], function(x) Getlength.inner(list(xtree[[1]], xtree[[2]][!is.na(match(dimnames(xtree[[2]])[[1]], x)),]))) #FD <- FD/Getlength.inner(xtree) result <- data.frame(S=S, FD=FD) } result } ######################################### #End of functions coded by Owen Petchey # ######################################### options(warn=-1) # creating a table with the list of all species occurences, using the format cbind(communities_labels,species_labels) comp<-comp_comm if (is.na(comp_comm)==F) { comp<-matrix(0,sum(comp_comm),2) flag<-1 for (i in row.names(comp_comm)) for (j in colnames(comp_comm)) { if (comp_comm[i,j]==1) { comp[flag,]<-c(i,j) flag<-flag+1 } #end of if } #end of i,j # comp<-as.data.frame(comp) } # end of if communities #Vector of available distances supported.dists = c("gower", "euclidean") #Vector of available methods (as named in the clue package) named as in the paper supported.methods = c(ward="ward.D2",single="single",complete="complete",UPGMA="average",UPGMC="centroid",WPGMC="median",WPGMA="mcquitty") # "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC) #Arguments tests, distances and methods selection if(!is.na(distances)) { if(prod(is.element(distances,supported.dists))!=1) stop("distances should be in 'gower' or 'euclidean'") Distances = distances }else{ Distances = supported.dists } if(dissimMethod=="spectral") if(length(Distances)>1) stop("You cannot use the 'spectral' method to compare trees calculated with different distances, use the 'cophenetic' method instead.") if (verb) cat("#################################################\n# Functional Diversity (FD) index calculation\n") if (verb) cat("#--\n# Distances are : ",Distances,"\n") if(!is.na(methods)) { if(prod(is.element(methods,names(supported.methods)))!=1) stop("methods should be in 'ward','single','complete','UPGMA','UPGMC','WPGMC' or 'WPGMA'") Methods = supported.methods[is.element(names(supported.methods),methods)] }else{ Methods = supported.methods } if (verb) cat("# Methods are : ",names(Methods),"\n") if(!is.na(comp)) { if(!is.data.frame(comp)) stop("'communities compositions' should be a data.frame ") comp=comp }#end of if(!is.na(comp)) #number of methods nbMethods = length(Methods) #list of method combinations methodList = lapply(seq(1:nbMethods),function(index) combinations(nbMethods,index,Methods)) names(methodList) = paste("Comb",seq(1:length(methodList)),sep="_") #scaling the trait matrix id numeric if(is.numeric(mat)) {mat=scale(mat)} #Creating a list of distances matrix dMatList = sapply(Distances,function(distance) daisy(mat,metric=distance,stand=F) , simplify=F ) #Creating a list containing all the trees for each distance calcTreesForADistance = function(dist,...) { dmat = dMatList[[dist]] #growing trees res=sapply(Methods,function(met) hclust(dmat,met) , simplify = FALSE) #naming trees with arguments methods #names(res) = available.methods[match(Methods,methods.corresp)] res }#end of calcTreesForADistance if (verb) cat("#--\n# Simple trees calculation ... \n") treeList = sapply(Distances,calcTreesForADistance,dMatList=dMatList ,simplify = FALSE) if (verb) cat("# Simple trees calculated ! \n") #Calculating dissimilarities for simple trees safe.cl_ultrametric <- function(x){ tryCatch(cl_ultrametric(x),error=function(e){ #cat("the tree present topological problems and has been dropped !\n") #X11() #plot(x) return(NA) }) }#eo safe.ultrametric calcDissimilarityForATree <- function(tree,distance) { if(any(is.na(tree))) Inf ult <- safe.cl_ultrametric(tree) if(is.na(ult)){ Inf }else{ cl_dissimilarity(ult,dMatList[[distance]],method=dissimMethod) } } calcDissimilaritiesForADistance <- function(distance) { #cat(distance,"\n") r <- sapply(treeList[[distance]],calcDissimilarityForATree,distance=distance) names(r) <- Methods[names(treeList[[distance]])] r } dissimTrees = sapply(Distances,calcDissimilaritiesForADistance) if (verb) cat("# Dissimilarities for simple trees calculated ! \n") #Getting the best simple Tree if(length(Methods)==1 && length(Distances)==1){ bestTreeValue = dissimTrees[[1]] bestTreeMethod = Methods bestTreeDistance = Distances }else { if(length(Methods)==1) { aa = as.matrix(dissimTrees) bestTreeNum = which.min(aa) bestTreeValue = aa[bestTreeNum][[1]] bestTreeMethod = names(Methods) bestTreeDistance = substr(rownames(aa)[bestTreeNum],1,nchar(rownames(aa)[bestTreeNum])-(nchar(names(Methods))+1)) }else { bestTreeNum = which.min(dissimTrees) bestTreeValue = dissimTrees[bestTreeNum][[1]] bestTreeMethod = names(Methods)[Methods==rep(rownames(dissimTrees),length(Distances))[bestTreeNum]] bestTreeDistance = t(replicate(length(Methods),colnames(dissimTrees)))[bestTreeNum] #rep(colnames(dissimTrees),length(Methods)) }#end of if/else }#end of if/else #Calculating and processing the consensuses if needed calConsensusForARow = function(row,distance){ #cat(distance,"\n") #cat("## ",row,"\n") if(any(dissimTrees[row,distance]==Inf)) return(NA) #extracting the methods to use in the consensus methods.in.consensus = names(supported.methods[is.element(supported.methods,row)]) #constructing an ensemble of trees with the methods combination hens = cl_ensemble(list=lapply(paste("treeList$",distance,"$",methods.in.consensus,sep=""),function(x)eval(parse(text=x)))) #naming the ensemble names(hens) = methods.in.consensus #constructing and returning the corresponding consensus cl_consensus(hens, method="majority", weights=1, control=list(p=1)) }#end of calConsensusForARow if (length(Methods) > 1) { if (verb) cat("#--\n# Consensuses calculation ...\n") consMethodList = methodList[-1] calConsensusForADistance = function(distance) lapply(consMethodList,function(mat) apply(mat,1,calConsensusForARow,distance=distance)) consensuses = sapply(Distances,calConsensusForADistance,simplify=F) if (verb) cat("# Consensuses calculated ! \n") #Calculating dissimilarities for consensuses using an ugly loop ... dissimCons = list() for (g in 1:length(Distances)) { dissimCons[[g]] <- list() for (i in 1:length(consMethodList)) { dissimCons[[g]][[i]] <- list() for (j in 1:dim(consMethodList[[i]])[1]) { dissimCons[[g]][[i]][[j]] = calcDissimilarityForATree(consensuses[[g]][[i]][[j]],Distances[g]) }#end of for j }#end of for i }#end of for g names(dissimCons) = Distances #Getting the best consensus minCons=999999999999999 bestConsDistance = character() bestConsMethods = character() bestCons = NULL posConsMin=integer() for (i in 1:length(dissimCons)) { for (j in 1:length(dissimCons[[i]])) { for (k in 1:length(dissimCons[[i]][[j]])) { x = dissimCons[[i]][[j]][[k]] #cat(x,"\n") if (!is.na(x)) if(x < minCons) {minCons=x ; bestConsDistance = Distances[i] ; bestConsMethods = names(Methods)[is.element(Methods,consMethodList[[j]][k,])] ; bestCons = consensuses[[i]][[j]][[k]]} }#end of for k }#end of for j }#end of for i if (verb) cat("# Dissimilarities for consensuses calculated ! \n") }#end of if length(Methods) > 1 res1 = list() cons=FALSE if (length(Methods) > 1) { #chossing the best : simple tree or consensus if(minCons < bestTreeValue) { cons=TRUE res1 = list(dissim=minCons,distance=bestConsDistance,methods= bestConsMethods,tree=as.hclust(bestCons),ultram=cl_ultrametric(as.hclust(bestCons)),data=mat,comp=comp,cons=cons) }else { res1 = list(dissim=bestTreeValue,distance=bestTreeDistance,methods= bestTreeMethod,tree=treeList[[bestTreeDistance]][[bestTreeMethod]],ultram=cl_ultrametric(treeList[[bestTreeDistance]][[bestTreeMethod]]),data=mat,comp=comp,cons=cons) }#end of if minCons < bestTreeValue } else { #the best is the best simple tree res1 = list(dissim=bestTreeValue,distance=bestTreeDistance,methods= bestTreeMethod,tree=treeList[[bestTreeDistance]][[names(bestTreeMethod)]],ultram=cl_ultrametric(treeList[[bestTreeDistance]][[names(bestTreeMethod)]]),data=mat,comp=comp,cons=cons) }#end of if/else length(Methods) > 1 #Xtree for the best bestXtree = Xtree(as.hclust(res1$tree)) # FD is the sum of the product of i.prime and h2.prime FD <- Getlength(bestXtree,comp=comp) if (is.na(comp_comm)==F) { FD<-FD[row.names(comp_comm),] } # communities in the same order than in comp_comm res2 = c(res1,FD=FD,valid=TRUE) options(warn=0) if (dissimMethod=="cophenetic"){ res2$limit <- .36 if (verb==T & res1$dissim > .36) { res2$valid=FALSE ; cat("#-----------------------------------\n# Warning : Dissimilarity between initial distance and ultrametric distance matrices is too high ! \n# Be carrefull in FD interpretation !\n") } } if (dissimMethod=="spectral"){ sigma2 <- var(dMatList[[res1$distance]]) + var(cl_ultrametric(res1$tree)) threshold <- 2*sqrt(sigma2)*sqrt(dim(mat)[1]) res2$limit <- threshold if(verb==T & res1$dissim > threshold) {res2$valid=FALSE ; cat("#-----------------------------------\n# Warning : Dissimilarity between initial distance and ultrametric distance matrices is too high ! \n# Be carrefull in FD interpretation !\n") } } if(verb) cat("#--\n# Best tree as a dissimilarity value of : ",res1$dissim,",\n") if(verb) if(cons) cat("# this resulting from a consensus,\n") if (verb) cat("# it is constructed with the ",res1$distance," distance\n") if (verb) cat("# and the following clustering method(s) : ",unlist(res1$methods),"\n") if(verb) if(!is.data.frame(comp)) {cat("#--\n# FD : ",FD,"\n")}else {cat("#--\n# FD : ",res2$FD.FD,"\n") } if(verb) cat("#########################################################\n") invisible(res2) }#end of GFD function