diff --git a/Data visualization/Bivariate plots.R b/Iceberg Code/Old Code/Data visualization/Bivariate plots.R similarity index 100% rename from Data visualization/Bivariate plots.R rename to Iceberg Code/Old Code/Data visualization/Bivariate plots.R diff --git a/SDM pipeline analytics/AUC statistics and figure for SDMs.R b/Iceberg Code/Old Code/SDM pipeline analytics/AUC statistics and figure for SDMs.R similarity index 100% rename from SDM pipeline analytics/AUC statistics and figure for SDMs.R rename to Iceberg Code/Old Code/SDM pipeline analytics/AUC statistics and figure for SDMs.R diff --git a/SDM pipeline analytics/Climate extinction checker.R b/Iceberg Code/Old Code/SDM pipeline analytics/Climate extinction checker.R similarity index 100% rename from SDM pipeline analytics/Climate extinction checker.R rename to Iceberg Code/Old Code/SDM pipeline analytics/Climate extinction checker.R diff --git a/SDM pipeline analytics/Continent maker.R b/Iceberg Code/Old Code/SDM pipeline analytics/Continent maker.R similarity index 100% rename from SDM pipeline analytics/Continent maker.R rename to Iceberg Code/Old Code/SDM pipeline analytics/Continent maker.R diff --git a/SDM pipeline analytics/Futures pipeline JustReductions.R b/Iceberg Code/Old Code/SDM pipeline analytics/Futures pipeline JustReductions.R similarity index 100% rename from SDM pipeline analytics/Futures pipeline JustReductions.R rename to Iceberg Code/Old Code/SDM pipeline analytics/Futures pipeline JustReductions.R diff --git a/SDM pipeline analytics/lulcdrops.R b/Iceberg Code/Old Code/SDM pipeline analytics/lulcdrops.R similarity index 100% rename from SDM pipeline analytics/lulcdrops.R rename to Iceberg Code/Old Code/SDM pipeline analytics/lulcdrops.R diff --git a/Splinter analyses/Generate BIOCLIM 1 and 4 from ERA5.R b/Iceberg Code/Old Code/Splinter analyses/Generate BIOCLIM 1 and 4 from ERA5.R similarity index 100% rename from Splinter analyses/Generate BIOCLIM 1 and 4 from ERA5.R rename to Iceberg Code/Old Code/Splinter analyses/Generate BIOCLIM 1 and 4 from ERA5.R diff --git a/Splinter analyses/Resample ERA5 to QDS resolution for side analysis of SDMs.R b/Iceberg Code/Old Code/Splinter analyses/Resample ERA5 to QDS resolution for side analysis of SDMs.R similarity index 100% rename from Splinter analyses/Resample ERA5 to QDS resolution for side analysis of SDMs.R rename to Iceberg Code/Old Code/Splinter analyses/Resample ERA5 to QDS resolution for side analysis of SDMs.R diff --git a/SDM Code/1ViralChatter_v4.r b/SDM Code/1ViralChatter_v4.r new file mode 100644 index 0000000..acfd44f --- /dev/null +++ b/SDM Code/1ViralChatter_v4.r @@ -0,0 +1,289 @@ +# !note that i never automated the copying of future layers tthat needed to be processed +#==================================================================== +#== 1. Install packages +#==================================================================== + +rm(list=ls()) +library(devtools) +if(Sys.info()['user']=='ctg') library(colorout) + +library(BIENWorkflow) +library(cmsdm) +library(ggplot2) +library(gridExtra) +library(textTinyR) +library(parallel) + +mc.cores=1 + +#==================================================================== +#== 2. Create directories +#==================================================================== + +runName='VC_12_23_21' + +#isRares=length(grep('Rare',runName))>0 + +redoSort=FALSE; whichSpecies='all' + +if(Sys.info()['user']=='ctg') { + #baseDir=paste0('/Users/ctg/Documents/SDMs/ViralChatter/Revision2/', runName,'_inputs') + baseDir=paste0('/Volumes/cm2/ViralChatter/', runName,'_inputs') +} + +# these use the demo data included in the package +mySpeciesDir='/Users/ctg/Dropbox/Projects/Viral_Chatter/Data/Masked_Presence_Data_By_Expert_Maps_R2' +#'/Users/ctg/Dropbox/Projects/Viral_Chatter/Data/PresenceData' +#myEnvDir= '/Volumes/cm2/ViralChatter/Revision1/CurrentEnv' +myEnvDir='/Volumes/cm2/viralChatterEnv_R2/Present' +otherDirs=list( + rdistDir=paste0(baseDir,'/rDists'), + domains=paste0(baseDir,'/domains')) + +#sortDirNames=c('PPM','Points','RangeBag')#c('Samples1','Samples3','Samples10') +#myAlgorithmNames=c('PPM','Points','RangeBag') +myAlgorithmNames=c('PPM','Points','RangeBag') +sortDirNames=c('Points','RangeBag','SDM','SDM10_20') +offsetDataDirNames=c('Expert_Maps') +myCustomBackgroundDir=NULL +myCustomBackgroundTable=NULL +# these should be stored as a single stack, in the same order as the env +#myOtherEnvDir= '/Volumes/cm2/ViralChatter/Revision1/OtherEnv' +myOtherEnvDir= '/Volumes/cm2/ViralChatter/fix_ip2611' +#'/Volumes/cm2/viralChatterEnv_R2/Future' +myCustomDomainRegionRaster='/Users/ctg/Dropbox/Projects/Viral_Chatter/Data/continents-final.tif' +#myCustomDomainRegionRaster='/Users/ctg/Dropbox/Projects/Viral_Chatter/Data/EcoregionRasterDinnerstein2017_QD_global.tif' +mySamplingModelDir=NULL +mySamplingDataDir=NULL +myOffsetDirs=NULL +samplingDataDirNames=NULL +customBackgroundTablePath=NULL +# run once then move other env over and rerun +allBaseDirs=sdmDirectories(baseDir, + myAlgorithmNames=myAlgorithmNames, + warn=FALSE, + mySpeciesDir=mySpeciesDir, + myEnvDir=myEnvDir, + myOffsetDirs=myOffsetDirs, + mySamplingDataDir=mySamplingDataDir, + mySamplingModelDir=mySamplingModelDir, + myCustomBackgroundDir=myCustomBackgroundDir, + myCustomBackgroundTable= myCustomBackgroundTable, + myCustomDomainRegionRaster= myCustomDomainRegionRaster, + myOtherEnvDir=myOtherEnvDir, + offsetDataDirNames=offsetDataDirNames, + samplingDataDirNames=samplingDataDirNames, + overwrite=FALSE, + sortDirNames=sortDirNames, + otherDirs=otherDirs) + +#==================================================================== +#== 3. Prep environmental layers +#==================================================================== + +if(Sys.info()['user']=='ctg') { # for cory only + #source('/Users/ctg/Dropbox/Projects/Viral_Chatter/Code/2ViralChatter_v2.r')\ + myprj = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") + env=stack(paste0(allBaseDirs$envDir,'/AllEnv.tif')) + layerNames=try(read.csv(list.files(allBaseDirs$envDir,'.csv',full.names=T), stringsAsFactors=F,header=F)) + world.shp=readOGR( '/Users/ctg/Dropbox/Projects/Climate_Horizons/SpeciesHorizons/HorizonsMaster/rangeHorizonsAnalysis/Model_Analysis/TM_WORLD_BORDERS_SIMPL-0.3/TM_WORLD_BORDERS_SIMPL-0.3.shp') + #world.shp=spTransform(world.shp,projection(env)) + pointsProj= CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") + + ## aggregate env layer for subsampling very large species (rather than running the spThin algorithm) + print('prepped coarse grid for thinning') + coarse.grid.file=paste0(allBaseDirs$miscDir, '/coarse_grid_for_thinning.tif') + if(!file.exists(coarse.grid.file)){ + env.template=env[[1]] + coarse.grid=aggregate(env.template,fact=4) + writeRaster(coarse.grid,file=coarse.grid.file,overwrite=T) + } +} +save(allBaseDirs,file=paste0(baseDir,'/allBaseDirs.rdata')) + +#==================================================================== +#== 4. Sort species +#==================================================================== + +#-------------------------------------------------------------------- +### 4.a sort species by algorithm +#-------------------------------------------------------------------- +sortDone=checkSortDone(allBaseDirs, deleteSorted=FALSE) +# here i manually copy the files from the last run to save time +sortSpeciesBulk(allBaseDirs,pointsProj,myprj=myprj,doThin=FALSE, thinCutoff=NULL,verbose=TRUE,overwrite=FALSE,doClean=TRUE,doParallel=T, mc.cores=mc.cores) + +#cm: 10/23/21 ditched this to see if thinning will address the oversampling issues +if(redoSort){ sortSpeciesFast(allBaseDirs, + pointsProj=CRS('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'), + myprj=myprj, + verbose=TRUE, + overwrite=TRUE, + mc.cores=10, + sampleCutoffs=c(1,3,10), + whichSpecies=whichSpecies)} + +if(length(sortDone$notSorted)>1 & !all(is.na(sortDone$notSorted))) sortSpeciesBulk(allBaseDirs,pointsProj=pointsProj,myprj=myprj,doThin=TRUE,thinCutoff=25,maxCells=20000,verbose=TRUE,overwrite=FALSE,doClean=TRUE,mc.cores=mc.cores) +#-------------------------------------------------------------------- +### 4.b specify which species to run +#-------------------------------------------------------------------- +speciesList=c(list.files(file.path(allBaseDirs$speciesDir,'SDM'), full.names=T),list.files(file.path(allBaseDirs$speciesDir,'SDM10_20'), full.names=T)) +done=tools::file_path_sans_ext(basename(gsub('__Maps__v1','', list.files(allBaseDirs$algorithmDirs$PPM$figureDir, full.names=TRUE)))) +keep=!tools::file_path_sans_ext(basename(speciesList)) %in% done +#done=checkSDMDone(allBaseDirs,speciesList,algorithm='PPM') +#str(done) +speciesList=speciesList[keep] +# speciesList=done$problems + + +#==================================================================== +#== 5. Model settings +#==================================================================== +cbs=commonBIENSettings(myprj,world.shp,runName=runName) +toDo=cbs$toDo +toSave=cbs$toSave +toOverwrite=cbs$toOverwrite + +toDo$background$customBG=FALSE +toDo$background$makeBiasedBackground=FALSE + +toDo$misc$algorithm='PPM' # must match name specified in myAlgorithmNames +toDo$sampling$samplingSettings=c('noBias') # note that the target group may give wierd predictions for the demo data, since its just a few species +toDo$fitting$expertSettings=list(prob=1e-6,rate=0,skew=1e-6,shift=0) +toDo$fitting$predictorSettings=NULL +toDo$fitting$priorSettings=NULL +toDo$fitting$algorithmSettings=c('maxnet') + +# these are changes to the defaults +toDo$transfer$whichFutures='all' +toDo$transfer$otherEnvProjections=T +toDo$plotting$plotOtherEnvPred=T +toDo$plotting$whichFuturesToPlot='ip' + +toSave$domain=FALSE +toDo$domain$domainTrimMethod='ecoregionRaster' +toDo$domain$coarseCrop=FALSE +toDo$domain$fixWeirdDisjunctEcoregions=FALSE +toDo$trimDomainMask=FALSE +toDo$background$maskEvalBGByBufferedPresences=FALSE +toDo$projection$maskToOccEcoNeighbors=FALSE +toDo$evaluation$evaluationModel='fullDomain' +toDo$metadata$bienMetadata=FALSE +toDo$fitting$maxTime=2200 +toDo$presences$pvalSet=1e-9 # guessing at a very conservative approach to keep data. + +# write out run setting used for metadata +runSettings=list(toDo,toSave,toOverwrite) +# 11/1/20 I overwrote the orignial run stuff so new BIEN defaults may have replaced the real settings. probably just about thresholding +save(runSettings,file=paste0(dirname(allBaseDirs$envDir),'/runSettings.rdata')) + +#-------------------------------------------------------------------- +### 6. Run SDM Models +#-------------------------------------------------------------------- +# 200-300 s per species usually +j=13; (speciesCSV=speciesList[j]); +mclapply(length(speciesList):1, function(j){ #length(speciesList) + print(j) + out=sdmBIEN(speciesCSV=speciesList[j], + allBaseDirs, + toDo=toDo, + toSave=toSave, + toOverwrite=toOverwrite) + if(class(out)=='try-error') print(out) + gc() +},mc.cores=mc.cores) + +#------------------------------------------------------------ +### 7. Run range bagging for all species +#------------------------------------------------------------ +myCustomDomainRegionRaster=='/Users/ctg/Dropbox/Projects/Viral_Chatter/Data/EcoregionRasterDinnerstein2017_QD_global.tif' +allBaseDirs2=sdmDirectories(baseDir, + myAlgorithmNames=myAlgorithmNames, + warn=FALSE, + mySpeciesDir=mySpeciesDir, + myEnvDir=myEnvDir, + myOffsetDirs=myOffsetDirs, + mySamplingDataDir=mySamplingDataDir, + mySamplingModelDir=mySamplingModelDir, + myCustomBackgroundDir=myCustomBackgroundDir, + myCustomBackgroundTable= myCustomBackgroundTable, + myCustomDomainRegionRaster= myCustomDomainRegionRaster, + myOtherEnvDir=myOtherEnvDir, + offsetDataDirNames=offsetDataDirNames, + samplingDataDirNames=samplingDataDirNames, + overwrite=FALSE, + sortDirNames=sortDirNames, + otherDirs=otherDirs) + +isRares=T +# if(isRares) +speciesListHull=speciesList=c(list.files(file.path(allBaseDirs$speciesDir,'RangeBag'),full.names=T)) +# if(!isRares) +# speciesListHull=speciesList=c(list.files(file.path(allBaseDirs$speciesDir,'Samples10'),full.names=T)) +future=T + +doneHull=checkSDMDone(allBaseDirs2,speciesList=speciesListHull, algorithm='RangeBag',checkFigs=F) # check whether any already done +str(doneHull) +speciesListHull=doneHull$notRun + +# to run species that failed as ppm +# # speciesList=list.files(file.path(allBaseDirs$speciesDir,'Samples10'), full.names=T) +# # done=tools::file_path_sans_ext(basename(gsub('__Maps__v1','', list.files(allBaseDirs$algorithmDirs$PPM$figureDir, full.names=TRUE)))) +# # keep=!tools::file_path_sans_ext(basename(speciesList)) %in% done +# # #done=checkSDMDone(allBaseDirs,speciesList,algorithm='PPM') +# # #str(done) +speciesList=c(list.files(file.path(allBaseDirs$speciesDir,'SDM'), full.names=T),list.files(file.path(allBaseDirs$speciesDir,'SDM10_20'), full.names=T)) +done=tools::file_path_sans_ext(basename(gsub('__Maps__v1','', list.files(allBaseDirs$algorithmDirs$PPM$figureDir, full.names=TRUE)))) +keep=!tools::file_path_sans_ext(basename(speciesList)) %in% done +speciesListHull=speciesList[keep] + + +#Settings +csrb=commonRangeBaggingSettings(myprj,world.shp,runName=runName) +toDo=csrb$toDo +toSave=csrb$toSave +toOverwrite=csrb$toOverwrite + +if(future){ + toDo$transfer$otherEnvProjections=TRUE + toSave$shapeFileTransfer=TRUE + toDo$transfer$whichFutures='all' + toDo$plotting$plotOtherEnvPred=TRUE + toDo$plotting$whichFuturesToPlot=c('ip') + toDo$plotting$nPlotCol=4 +} +if(!future) toDo$plotting$nPlotCol=2 + +toDo$domain$domainTrimMethod='ecoregionRaster' +toDo$fitting$maxTime=40 +toDo$misc$bienMetadata=FALSE +toDo$domain$coarseCrop=FALSE +toDo$domain$fixWeirdDisjunctEcoregions=FALSE +toDo$trimDomainMask=FALSE +toDo$background$maskEvalBGByBufferedPresences=FALSE +toDo$projection$maskToOccEcoNeighbors=FALSE +toDo$evaluation$evaluationModel='fullDomain' + +if(!isRares){ + toDo$presences$removeSpatialOutliers=TRUE + toDo$presences$removeEnvOutliers=TRUE +} + +# write out run setting used for metadata +runSettingsRB=list(toDo,toSave,toOverwrite) +save(runSettingsRB,file=paste0(dirname(allBaseDirs2$envDir),'/runSettingsRangeBag.rdata')) + + +# Run Models +j=2; (speciesCSV=speciesListHull[j]); +mclapply(1:length(speciesListHull), function(j){ #length(speciesListHull) + print(j) + out=sdmRangeBag(speciesCSV=speciesListHull[j], + allBaseDirs2, + toDo=toDo, + toSave=toSave, + toOverwrite=toOverwrite) + if(class(out)=='try-error') print(out) + gc() +},mc.cores=mc.cores) + + diff --git a/SDM Code/workflowBIEN.r b/SDM Code/workflowBIEN.r new file mode 100644 index 0000000..8c39f79 --- /dev/null +++ b/SDM Code/workflowBIEN.r @@ -0,0 +1,1026 @@ + + +sdmBIEN=function(speciesCSV, + allBaseDirs, + algorithm, + toDo=toDo, + toSave=toSave, + toOverwrite=toOverwrite){ + + out=try({ + + verbose=toDo$misc$verbose + + start.time=proc.time() + species=unlist(strsplit(basename(speciesCSV),'.csv')) + if(verbose>0) print(paste0(species,': Process started at ', round(start.time[3]))) + + # generate random seed to be sure to get the same results for each run + let=strsplit(strsplit(species,'_')[[1]][2],'')[[1]][1:4] + if(all(is.na(let))) let=strsplit(species,'')[[1]][1:4] + seed=as.integer(paste(which(letters %in% let),collapse='')) + set.seed(seed) + + #------------------------------------------------------------------- + ## 1. Important Objects + #------------------------------------------------------------------- + #-- these are the 3 main objects referenced throughout the workflow + #-- status monitors workflow progress and catches errors + + status=.statusDF(species) + + dirs=speciesDirectories(species=species, + allBaseDirs=allBaseDirs, + algorithm=toDo$misc$algorithm, + deletePastModels=toDo$misc$deletePastModels, + writeSDPred=toSave$SDPred, + whichFutures=toDo$transfer$whichFutures, + saveModelObj=toSave$modelObj) + if(class(dirs)=='try-error') { + outMessage=(paste0(species,': dirs are messed up')) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + #-- set up temp raster path to delete later + rasterOptions(tmpdir=dirs$temp.path) + + #-- specify which models to fit + # this is for old compatibility, might be smarter to integrate this with stats storage + modelSettings=list(samplingSettings=toDo$sampling$samplingSettings, + expertSettings=toDo$fitting$expertSettings, + predictorSettings=toDo$fitting$predictorSettings, + algorithmSettings= toDo$fitting$algorithmSettings, + formulaMaker=toDo$fitting$formulaMaker) + #modelSpecs(modelSettings) + stats=.statsStorage(species,modelSettings,type=toDo$misc$algorithm) + stats$uniqueID=paste0(species,'__',stats$modelNames,'__', toDo$misc$runName) + # add presence and background sample sizes + + #------------------------------------------------------------------- + ## 2. Environment + #------------------------------------------------------------------- + + ### 2.a read/clean data + #------------------------------------------------------------------- + envFile=list.files(dirs$envDir,'.tif',full.names=T) + env=suppressWarnings(stack(envFile)) + + #### 2.a.i different layers for pres and sampling + #-- none yet + + ### 2.b preselect layers + #------------------------------------------------------------------- + layerNames=try(read.csv(list.files(dirs$envDir,'.csv', full.names=T),header=FALSE, stringsAsFactors=FALSE)) + if(!class(layerNames)=='try-error'){ + best.var=layerNames[,1] #names(env) + names(env)=best.var + } else { + print('add a file with your layer names called layerNames.csv in the Env folder or bad shit could happen') + best.var=names(env) + } + + #------------------------------------------------------------------- + ## 3. Occurrence + #------------------------------------------------------------------- + + ### 3.a read/clean data and optionally thin + #------------------------------------------------------------------- + prog.points=cleanOcc(speciesCSV=speciesCSV, + env=env, + doClean=FALSE, + writeResult=FALSE, + dirs=dirs) + + status$prog.points=.statusTracker(status$prog.points,prog.points, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.points==TRUE) { + outMessage=paste0(species,": Skipping; cleaning occurrences failed with:",status$prog.points) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + pres=prog.points$pres + # do the extract once at beginning; this is duplicated below; doesn't hurt anything but should be cleand up + pres@data=cbind(pres@data,raster::extract(env,pres,ID=FALSE)) + keep=complete.cases(pres@data) + pres=pres[keep,] + if(length(pres) < toDo$presences$minSampleSize){ + outMessage=paste0(species,": Skipping; after extracting env at presences reduced sample size to ",length(pres) ," which is less than threshold you set of ", toDo$presences$minSampleSize) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + # !! try rgeos::gCentroid + if(any(toDo$presences$removeSpatialOutliers, toDo$presences$removeEnvOutliers)){ + prog.outliers=findOutlyingPoints(pres, + spOutliers= toDo$presences$removeSpatialOutliers, + envOutliers= toDo$presences$removeEnvOutliers, + bestVar=best.var, + doPlot=T, + plotFile=paste0(dirs$sp.diag.path, '/', species,'__OccurrenceOutliers.png'), + pval=toDo$presences$pvalSet, + env=env, + species=species) + stats$n.spatial.outliers=length(prog.outliers$spatialToss) + stats$n.env.outliers=length(prog.outliers$envToss) # record number tossed + pres=prog.outliers$pres + keep=complete.cases(pres@data) + pres=pres[keep,] + # if we tossed any, reassign folds to be sure we didn't toss an entire fold + if(any(keep==FALSE)) pres=spatialStratify(pres,stats) + } + + if(length(pres) < toDo$presences$minSampleSize) { + outMessage=paste0(species,": Skipping; after extracting env at presences reduced sample size to less than ", toDo$presences$minSampleSize) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + stats$n.pres=length(pres) + status$has.points=ifelse(nrow(pres)>0,TRUE, FALSE) + status$n.points=nrow(pres) + status$prog.outliers=.statusTracker(status$prog.outliers,prog.outliers, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.outliers==TRUE) { + outMessage=paste0(species,": Skipping; cleaning outliers failed with: ",status$prog.outliers) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + ### 3.b filter/prep occurrences + #------------------------------------------------------------------- + #== Stratify 5 folds for CV + pres=spatialStratify(pres,stats) + + status$prog.cv=.statusTracker(status$prog.cv,pres,toSave=toSave, toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.cv==TRUE) return(status) + if(length(pres)==1) status$do.cv=FALSE + if(length(pres)>1) status$do.cv=TRUE + + if(verbose>2) print(paste0(species,': ',length(pres),' presence points')) + + #------------------------------------------------------------------- + ## 4. Sampling + #------------------------------------------------------------------- + + ### 4.a read/clean data + #------------------------------------------------------------------- + #== define which species contribute to sampling + biased.bg=biasedBackground(dirs=dirs, + env=env, + pointsProj=toDo$presences$pointsProj, + overwrite=toOverwrite$biasedBackground, + writeResult=toSave$biasedBackground, + customBG=toDo$background$customBG, + makeBiasedBackground= toDo$background$makeBiasedBackground) + if(is.null(biased.bg) & toDo$background$makeBiasedBackground){ + warning(paste0(species,": I couldn't make the target group background so I'm turning those settings off and continuing with the models ignoring sampling bias")) + toDo$background$makeBiasedBackground=F + toDo$background$customBG=T + toss=which(stats$samplingSet=='targetBG') + stats=stats[-toss,] + } + status$prog.bias=.statusTracker(status$prog.bias,biased.bg, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.bias==TRUE) { + outMessage=paste0(species,": Skipping; biasedBackground failed with:", status$prog.bias) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + ### 4.b determine modeling domain + #------------------------------------------------------------------- + env=makeDomain(dirs=dirs, + biased.bg=biased.bg, + env=env, + pres=pres, + method=toDo$domain$domainTrimMethod, + writeResult=toSave$domain, + overwrite=toOverwrite$domain, + overwriteBiasMask=toOverwrite$biasMask, + trimBufferkm=toDo$domain$trimBufferkm, + maxTime=toDo$domain$maxTrimDomainTime, + domainClumpDist=toDo$domain$domainClumpDist, + coarseCrop=toDo$domain$coarseCrop, + fixWeirdDisjunctEcoregions= toDo$domain$fixWeirdDisjunctEcoregions, + gdalCompress=toDo$misc$gdalCompress, + saveMasks=FALSE) + if(class(env)=='try-error') { + outMessage=paste0(species,': Skipping; could not trim domain. Maybe this species did not fall within an ecoregion, but just guessing.') + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + # this is the mask to be used later for trimming the domain for different apps + if(toDo$trimDomainMask){ + ecoMask=makeDomain(dirs=dirs, + biased.bg=biased.bg, + env=env, + pres=pres, + method='ecoregionAndNeighborsRaster', + writeResult=toSave$domain, + overwrite=toOverwrite$domain, + overwriteBiasMask=toOverwrite$biasMask, + trimBufferkm=toDo$domain$trimBufferkm, + maxTime=toDo$domain$maxTrimDomainTime, + domainClumpDist=toDo$domain$domainClumpDist, + coarseCrop=TRUE, + fixWeirdDisjunctEcoregions= toDo$domain$fixWeirdDisjunctEcoregions, + gdalCompress=toDo$misc$gdalCompress, + saveMasks=T) + env$ecoMask=raster::extend(ecoMask$trim.domain.2,env) # save mask to make bg + } else { + env$ecoMask=1 #hack to avoid changing any later code. leads to useless calculations below but not useless cory time + } + # add plot of correlations? + if(toDo$presences$removeCorrelatedPredictors){ + rcp=removeCorrelatedPredictors(env, + best.var=best.var, + predictorsToKeepNoMatterWhat= + toDo$presences$predictorsToKeepNoMatterWhat, + species) + env=rcp[['env']]; best.var=rcp[['best.var']] + } + + if(toDo$domain$standardizePredictors){ + print(paste0(species,': standardizing predictors')) + env.masked=raster::mask(env,env$ecoMask) + env.means=raster::cellStats(env.masked[[best.var]],'mean') + env.sd=raster::cellStats(env.masked[[best.var]],'sd') + out=raster::stack(lapply(seq_along(best.var),function(i) calc(env[[best.var[i]]],function(x) (x-env.means[best.var[i]])/env.sd[best.var[i]]))) + ln=names(env) + env=raster::stack(out,env[[which(!names(env)%in%best.var)]]) + names(env)=ln + #write these out in case we want to project somewhere else + stats$envMeansOverDomain=rjson::toJSON(env.means) + stats$envSDOverDomain=rjson::toJSON(env.sd) + rm(out) + } + + stats$predictors.kept=rjson::toJSON(best.var) + stats$predictors.tossed=rjson::toJSON(rcp[['tossed']]) + + status$prog.domain=.statusTracker(status$prog.domain,env, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.domain==TRUE) { + outMessage=paste0(species,": Skipping; domain failed with: ", status$prog.domain) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + if(verbose>0) print(paste0(species,': done trimming domain')) + + ### 4.d sample background + #------------------------------------------------------------------- + #== based on the trimmed domain + prog.bg=makeBackground(env=env.masked, # borrow this from above + dirs=dirs, + stats=stats, + biased.bg=biased.bg, + maxBGFit=toDo$background$maxBGFit, + writeResult=toSave$background, + overwrite=toOverwrite$background) + if(class(prog.bg$bg)=="character"){ + if(prog.bg$bg=='noData'){ + outMessage=paste0(species, ': bailing because <10 background found ') + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + } + status$prog.bg=.statusTracker(status$prog.bg,prog.bg, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.bg==TRUE) { + outMessage=paste0(species,": Skipping; background failed with: ", status$prog.bg) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + #-- make separate background for evaluation + if(toDo$background$maskEvalBGByBufferedPresences){ + #-- rasterize points, buffer them by 25km, sample 100x more than pres (or 10k, whichever is less) + pres.r=pres %>% SpatialPoints %>% raster::rasterize(env,field=rep(1,length(pres))) + # plot(pres.r); points(pres) + pres.rb=raster::buffer(pres.r,width=2e4) + if( sum(values(pres.r),na.rm=T) == sum(values(pres.rb),na.rm=T) ){ + stop(paste0(species,': buffering presences didnt work')) + } + presMask=suppressWarnings(raster(pres.rb)) + values(presMask)=1 + values(presMask)[values(pres.rb)==1]=NA + env.masked2=raster::mask(env.masked,presMask) + maxBGEval=min(1e4,toDo$background$maxBGEvalMultiplier*length(pres)) + prog.pres.masked.bg=makeBackground(env=env.masked2, + dirs=dirs, + stats=stats, + biased.bg=biased.bg, + maxBGFit=maxBGEval, + writeResult=toSave$background, + overwrite=toOverwrite$background) + } else {bg.eval=prog.bg$bg} + #------------------------------------------------------------------- + ### 5. Expert/Prior + #------------------------------------------------------------------- + + ### 5.a read/clean data + #------------------------------------------------------------------- + #prog.prior=uniformPrior(env,stats) + fx_priors=env[[1]] + fx_not_nas=!is.na(fx_priors) + values(fx_priors)[values(fx_not_nas)]=1/sum(values(fx_not_nas), na.rm=T) + # center priors to avoid maxnet issues. + fx_priors=log(fx_priors) + fx_prior.means=try(raster::cellStats(fx_priors,mean)) # error from large rasters? + if(class(fx_prior.means)=='try-error'){ + warning('stop - the uniform prior does not fit in memory to use with cellStats') + } + fx_prior.means=raster::cellStats(fx_priors,mean) + raster::values(fx_priors)=do.call('cbind',lapply(1:length(fx_prior.means), function(x) raster::values(fx_priors[[x]])+abs(fx_prior.means[x]))) + names(fx_priors)='prior_unif' + stats$prior.file=NULL + prog.prior = list(priors=fx_priors,stats=stats) + + priors=prog.prior$priors + stats=prog.prior$stats + + status$prog.prior=.statusTracker(status$prog.prior,priors, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.prior==TRUE) { + outMessage=paste0(species,": Skipping; prior failed with: ", status$prog.prior) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + status$has.expert=FALSE + status$prog.expert=T + status$prog.prior=T + status$prog.eval.prior=T + status$n.models=nrow(stats) + range.poly=NULL + + if(toDo$misc$rmObjs) rm(prog.prior) + status$time.input.prep=round((proc.time() - start.time)[3]) + if(verbose>0) print(paste0(species,': done input prep in ',status$time.input.prep,'s')) + + # do extract with all the layers created. these objects should all be completely homologous except for folds in pres + env2=raster::stack(env,priors) + bg=SpatialPointsDataFrame(prog.bg$bg, data.frame(raster::extract(env2,prog.bg$bg),prog.bg$bg@data[,c('sp','equalWeights')])) + if(toDo$background$maskEvalBGByBufferedPresences){ + bg.eval=SpatialPointsDataFrame(prog.pres.masked.bg$bg, data.frame(raster::extract(env2,prog.pres.masked.bg$bg),prog.pres.masked.bg$bg@data[,c('sp','equalWeights')])) + } else {bg.eval=bg} + if(!is.null(biased.bg)){ + biased.bg=SpatialPointsDataFrame(prog.bg$biased.bg, data.frame(raster::extract(env2,prog.bg$biased.bg),prog.bg$biased.bg@data[,c('sp','equalWeights')])) + } + + pres=SpatialPointsDataFrame(coordinates(pres), data.frame(raster::extract(env2,pres), pres@data[,c('sp','equalWeights','folds')])) + # in case presences fall outside the environmental rasters (giving NA env values) + keep=complete.cases(pres@data) + pres=pres[keep,] + # if we tossed any, reassign folds to be sure we didn't toss an entire fold + if(any(keep==FALSE)) pres=spatialStratify(pres,stats) + if(length(pres) < toDo$presences$minSampleSize){ + outMessage=paste0(species,": Skipping; after extracting env at presences reduced sample size to less than ", toDo$presences$minSampleSize) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + if(toDo$misc$rmObjs) rm(prog.bg) + if(toDo$misc$rmObjs) rm(env2) + + #------------------------------------------------------------------- + ## 6. SDM + #------------------------------------------------------------------- + + ### 6.a full model + #------------------------------------------------------------------- + full.mods=NULL + + ### 6.b. cross validation + #------------------------------------------------------------------- + + if(verbose>0) print(paste0(species,': fitting cv models')) + + cv.mods=try({ + cv.mods.k=vector('list',length(unique(pres$folds))) + for(k in sort(unique(pres$folds))){ + if(verbose>3) print(paste0(species,': fold ', k,':')) + # need at least 3 fitted models to continue, so bail early if that's not going to happen so we're not waiting for slow models we won't use anyhow + if(k>1){ + cond=(sum(sapply(cv.mods.k[1:(k-1)],function(x) is.null(x)))) + if(cond>2){ + outMessage=paste0(species,': 3 models already failed (maybe due to exceeding time limit) so I wont even bother to keep trying the others') + message(outMessage) + return(outMessage) + } + } + model=sdmPPMOffset.cv(dirs=dirs, + pres.fit=pres, + k=k, + bg=bg, + biased.bg=biased.bg, + stats=stats, + maskName=toDo$fitting$maskName, + formulaMaker=toDo$fitting$formulaMaker, + maxTime=toDo$fitting$maxTime, + best.var=best.var, + saveModelObj=toSave$modelObj, + evalFork=toDo$fitting$evalFork, + standardize=!toDo$domain$standardizePredictors, + quadraticUpperLimit0= toDo$fitting$quadraticUpperLimit0, + lambdaSeq=toDo$fitting$lambdaSeq, + minPresences= ceiling(.6*toDo$presences$minSampleSize)) + # retrofitting for consistency; would be better if this were the output above + if(is.null(model)) {model=NA; status$some.models.missing=TRUE + } else if(model[[1]]$n.features.lambda.1se==0) { + model=NA; status$some.models.missing=TRUE + } + cv.mods.k[[k]]=model + } + cv.mods.k + }) + # add a catch in case predictions are uniform + failed=sapply(cv.mods,function(x){ is.na(x) }) + if(all(failed)){ + outMessage=paste0(species,': Skipping; glmnet ran but no models could be fit') + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + if(sum(!failed)<3){ + outMessage=paste0(species,': Skipping; more than 2 folds failed') + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + status$prog.fit.k=.statusTracker(status$prog.fit.k,cv.mods, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.fit.k==TRUE) { + outMessage=paste0(species,": Skipping; fitting failed ", status$prog.fit.k) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + #cv.mods=prog.fit.k + #if(toDo$misc$rmObjs) rm(prog.fit.k) + + #-- store metrics in stats (combine metrics for each fold) + # might be nice to use json some day... + n.per.fold=pres$folds %>% table + stats$eff.n.pres.fit=n.per.fold %>% mean %>% round + stats$n.pres.by.fold=rjson::toJSON(n.per.fold %>% unname) + stats$used.lambda.min.by.fold=rjson::toJSON(sapply(cv.mods,function(x) ifelse(is.na(x),NA,x[[1]]$used.lambda.min))) + # only working for 1 model.extract + stats$model.fit.ok=rjson::toJSON(which(sapply(cv.mods,function(x){ any(class(x[[1]])=='cv.glmnet')}))) + # TODO: probably doesnt work for multiple models + stats$n.features.by.fold= rjson::toJSON(.getFromNestedLists(cv.mods,'n.features.lambda.1se') %>% unname) + stats$formula.fit.attempt.by.fold= rjson::toJSON(.getFromNestedLists(cv.mods,'fitAttempt')%>% unname) + #-- plot regularization curves (combine metrics for each fold) + if(toDo$plotting$regCurve){ + plot.f=paste0(dirs$sp.diag.path,'/',species,'_RegPath.pdf') + pdf(plot.f,w=4*nrow(stats),h=10) + par(mfrow=c(5,nrow(stats)),mar=c(3,6,3,1)) + lapply(seq_along(cv.mods),function(x){ lapply(seq_along(cv.mods[[x]]),function(y) { + if(any(class(cv.mods[[x]][[y]])=='cv.glmnet')) { + plot(cv.mods[[x]][[y]],cex.lab=1,cex.axis=1) + mtext(paste0('fold ',x,' model ',y),3,line=-2) + }}) + }) + dev.off() # ;system(paste0('open ',plot.f)) + } + + status$time.fit=round((proc.time() - start.time)[3] - status$time.input.prep) + if(verbose>0) print(paste0(species,': done fitting cv models in ',status$time.fit,'s')) + + #------------------------------------------------------------------- + ## 7. Projection + #------------------------------------------------------------------- + + if(verbose>0) print(paste0(species,': projecting ...')) + + ### 7.b. Project k-fold CV + #------------------------------------------------------------------- + prog.proj.k=try({ + # create a mask that prevent predictions beyond N sd of the training data + # subset just the best variables used in modeling for speed + # don't normalize the folds - leave the predictions comparable and take their weighted avg for ensemble. then normalize, and record the normalization constant (wait to normalize so i only have 1 constant to rescale the threshold by). thresholds will be estimated, and to threshold future maps, multiply the threshold by the normalization constant to get future binary maps + env.new=env[[best.var]] + extrapLimits=NULL + if(!is.null(toDo$projection$extrapolationRule)){ + if(toDo$projection$extrapolationRule=='trainingPresSD'){ + pres.dat=raster::extract(env.new,pres) + e.mask=extrapolationMask(env.new, + dat=pres.dat, + rule='sd', + val=toDo$projection$extrapolationValue) + extrapLimits=e.mask$extrapLims + } + if(toDo$domain$standardizePredictors){ + all.masks=raster::stackApply(e.mask$masks, rep(1,length(best.var)), sum)==length(best.var) + env.new=stack(raster::mask(env.new,all.masks,maskvalue=0), env[[which(!names(env) %in% best.var)]]) + } else { + env.new=stack(e.mask$maskedEnv,env[[which(!names(env) %in% best.var)]]) + } + } + if(toDo$projection$projectPresentInOnlyUsedEcoregions){ + + if(!any(na.omit(values(env$trim.domain))==2)){ print("Note - you chose toDo$projection$projectPresentInOnlyUsedEcoregions=TRUE to avoid projecting into unoccupied (adjacent) ecoregions in with the fitting data (present) but you did not choose a domain that included adjacent ecoregions. So you're good, but just wanted to let you know that there aren't any adjacent ecoregions in your domain.") + } else { + myMask=env$trim.domain + myMask[myMask==2]=NA + env.new=raster::mask(env.new,myMask) + if(toDo$misc$rmObjs) rm(myMask) + } + } + + cv.stats=vector('list',length(unique(pres$folds))) + # TODO: might not work for multiple models + + for(k in sort(unique(pres$folds))){ + if(any(unlist(lapply(cv.mods[[k]],class))=='cv.glmnet')){ + tmp.proj=projectSDM(dirs=dirs, + mod.out=cv.mods[[k]], + stats=stats, + projEnv=env.new, + priors=priors, + name=paste0(stats$species[1],'__fold',k), + transfer=NULL, + best.var=best.var, + fileNameSuffix='_fullDomain', + gdalCompress=toDo$misc$gdalCompress) + tmp.proj$stats$fold=k + cv.stats[[k]]=tmp.proj$stats + } + } + list(cv.stats=cv.stats) + }) + + status$prog.proj.k=.statusTracker(status$prog.proj.k,cv.mods, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.proj.k==TRUE) { + outMessage=paste0(species,": Skipping; projection failed with: ", status$prog.cv.stats) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + stats.cv=prog.proj.k$cv.stats + stats$normalizationConstant=rjson::toJSON(sapply(stats.cv,function(x) ifelse(is.null(x),NA,x$normalizationConstant))) + + + status$time.project=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit) + if(verbose>0) print(paste0(species,': ','done projecting ')) + if(verbose>0) print(paste0(species,': ',"time: ",status$time.project)) + + ### 7.d. Mask to relevant ecoregions + #------------------------------------------------------------------- + # basically we won't use the continuous predictions based on buffered points for anything; they'll just hang around in case we need them in the future. + + fullDomainFiles=list.files(dirs$sp.pred.path,pattern='fold',full.names=T) + #-- by convention, the first layer is the ecoregion one + if(toDo$projection$maskToOccEcoNeighbors){ + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=T))[[1]] + occEcoNeighFiles=sub('_fullDomain.tif','_occEcoNeigh.tif', fullDomainFiles,fixed=T) + #-- occupied and neighbors + thisMask=raster::reclassify(myMasks,c(.99,2.01,1)) + a=lapply(seq_along(fullDomainFiles),function(x){ + fold.r=suppressWarnings(raster(fullDomainFiles[x])) + fold.r2=raster::crop(fold.r,thisMask) + fold.r3=raster::mask(fold.r2,thisMask) + .writeRasterCM(fold.r3,occEcoNeighFiles[x],toDo$misc$gdalCompress) + }) + } + #-- occupied only + if(toDo$trimDomainMask){ + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=T))[[1]] + occEcoFiles=sub('_fullDomain.tif','_occEco.tif',fullDomainFiles,fixed=T) + thisMask=raster::reclassify(myMasks,c(1.01,Inf,NA)) + a=lapply(seq_along(fullDomainFiles),function(x){ + fold.r=suppressWarnings(raster(fullDomainFiles[x])) + fold.r2=raster::crop(fold.r,thisMask) + fold.r3=raster::mask(fold.r2,thisMask) + .writeRasterCM(fold.r3,occEcoFiles[x],toDo$misc$gdalCompress) + }) + } + + #------------------------------------------------------------------- + ## 8. Evaluate + #------------------------------------------------------------------- + + if(verbose>0) print(paste0(species,': evaluating ...')) + + ### 8.a Evaluate continuous predictions + #------------------------------------------------------------------- + # Note- this just evaluates the models masked to occupied ecoregions + prog.eval.cont=evaluateContFullCV(pres=pres, + bg1=bg.eval, + biased.bg=biased.bg, + dirs=dirs, + stats=stats, + full.mods=full.mods, + maxBGEval=toDo$background$maxBGEval, + whichModel='mean', + modelNameGrep= toDo$evaluation$evaluationModel, + NATo0=FALSE) + status$prog.eval.cont=.statusTracker(status$prog.eval.cont, prog.eval.cont, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.eval.cont==TRUE){ + outMessage=paste0(species,": Skipping; continuous evaluations failed with: ", status$prog.eval.cont) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + stats=prog.eval.cont$stats + + ### 8.b Evaluate binary predictions + #------------------------------------------------------------------- + # consider selecting absences for eval smarter and then using SPS + # buffer around presences to select background. distances equals x km/max.dist.between pts + prog.eval.bin=evaluateBinFullCV(pres=pres, + bg=bg.eval, + dirs=dirs, + biased.bg=biased.bg, + stats=stats, + thresholds=toDo$threshold$thresholds, + modelNameGrep= toDo$evaluation$evaluationModel) + stats=prog.eval.bin + status$prog.eval.bin=.statusTracker(status$prog.eval.bin,stats,toDo) + if(!status$prog.eval.bin==TRUE) { + outMessage=paste0(species,": Skipping; binary evaluations failed with: ", status$prog.eval.bin) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + ### 8.c Find best models + #------------------------------------------------------------------- + stats=bestModelIndicator(stats=stats) + status$prog.find.best=.statusTracker(status$prog.find.best,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.find.best==TRUE) { + outMessage=paste0(species,": Skipping; finding best models failed with: ", status$prog.avg) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + status$time.eval=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit- status$time.project) + if(verbose>0) print(paste0(species,': done evaluating')) + if(verbose>2) print(paste0(species,': time: ',status$time.eval)) + + ### 8.d Summarize k-fold CV weighting by performances + #------------------------------------------------------------------- + + wtrans=function(y){ + y[y<0.5]=NA # need to toss models where auc < .5 so low AUC doesnt lead to huge weights + w=(y-.5)^2; w/sum(w,na.rm=T)} + w1=suppressWarnings(stats$cv.byfold.test.auc %>% as.character %>% fromJSON %>% unlist %>% as.numeric %>% wtrans %>% replace_na(0)) #weights + stats$AUCWeighted.test.AUC=suppressWarnings(stats$cv.byfold.test.auc %>% as.character %>% fromJSON %>% unlist %>% as.numeric %>% weighted.mean(w1) %>% round(3)) + w2=suppressWarnings(stats$cv.byfold.test.pAUC.sens10.8%>% as.character %>% fromJSON %>% unlist %>% as.numeric %>% wtrans %>% replace_na(0)) + stats$pAUCWeighted.test.pAUC= suppressWarnings(stats$cv.byfold.test.pAUC.sens10.8%>% as.character %>% fromJSON %>% unlist %>% as.numeric %>% weighted.mean(w2) %>% round(3)) + if(sum(w2>0)<3){ + outMessage=paste0(species,": Skipping; only 1 or 2 folds has a useful model, which kinda means you don't have a useful model ", status$prog.avg) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + # need to add the domain to the file name; think we just have occEcoNeigh + if(!toDo$evaluation$evaluationModel=='fullDomain'){ + prog.summarize.cv=summarizeKFoldFast(dirs=dirs, + stats=stats, + writeSDPred=FALSE, # not implemented for weights + algorithm=toDo$misc$algorithm, + gdalCompress=toDo$misc$gdalCompress, + ensembleWeights= toDo$projection$ensembleWeights, + modelNameGrep= toDo$evaluation$evaluationModel, + transfer=NULL) + } + #-- could also summarize only this one and mask to other domains but less code + prog.summarize.cv2=summarizeKFoldFast(dirs=dirs, + stats=stats, + writeSDPred=FALSE, # not implemented for weights + algorithm=toDo$misc$algorithm, + gdalCompress=toDo$misc$gdalCompress, + ensembleWeights= toDo$projection$ensembleWeights, + modelNameGrep='fullDomain', + transfer=NULL) + + status$prog.avg=.statusTracker(status$prog.avg,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.avg==TRUE) { + outMessage=paste0(species,": Skipping; summarizing models fast failed with: ", status$prog.avg) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + status$time.project=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit) + if(verbose>0) print(paste0(species,': done projecting ')) + if(verbose>0) print(paste0(species,': time: ',status$time.project)) + + #------------------------------------------------------------------- + ## 9. Transfer model to new conditions + #------------------------------------------------------------------- + ### 9.a. Project CV models to full domain + #------------------------------------------------------------------- + prog.transfer=NULL + if(toDo$transfer$otherEnvProjections){ + if(verbose>0) print(paste0(species,': transferring ... ')) + # environment for transfer + env.new=NULL + prog.transfer=try({ + new.envs=list.files(dirs$otherEnvDir,full.names=T,pattern='tif') + if(all(toDo$transfer$whichFutures=='all')){ + keep=seq_along(basename(new.envs)) + } else { + keep=c(mapply(function(x) grep(x,basename(new.envs)),toDo$transfer$whichFutures)) + } + new.envs=new.envs[keep] + + for(ii in 1:length(new.envs)){ + print(paste0(species,': ',' projecting ',tools::file_path_sans_ext(basename(new.envs[ii])))) + env.new=raster::stack(new.envs[ii]) + prepped=.prepTransferEnv(env.new=env.new,env=env, env.means=env.means, env.sd=env.sd, priors=priors,toDo=toDo,best.var=best.var,pres=pres, dirs=dirs) + env.new=prepped$env.new + priors=prepped$priors + + for(k in sort(unique(pres$folds))){ + transfer=tools::file_path_sans_ext(basename(new.envs[ii])) + if(any(unlist(lapply(cv.mods[[k]],class))=='cv.glmnet')){ # to avoid projecting when a model errors + tmp.proj=projectSDM(dirs=dirs, + mod.out=cv.mods[[k]], + stats=stats, + projEnv=env.new, + priors=priors, + name=paste0('fold',k), + best.var=best.var, + transfer=transfer, + algorithm=toDo$misc$algorithm, + fileNameSuffix='_fullDomain', + verbose=1) + } + } # folds loop + } # env loop + }) # try + } + + status$prog.transfer=.statusTracker(status$prog.transfer,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(toDo$misc$rmObjs) rm(env.new) + if(!status$prog.transfer==TRUE){ + outMessage=paste0(species,": Skipping; transfer failed with: ", status$prog.transfer) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + ### 9.b. Summarize k-fold CV for transfer + #------------------------------------------------------------------- + prog.transfer.avg=NULL + if(toDo$transfer$otherEnvProjections){ + prog.transfer.avg=try({ + for(ii in 1:length(new.envs)){ + transfer=tools::file_path_sans_ext(basename(new.envs[ii])) + summarizeKFoldFast(dirs=dirs,stats=stats,writeSDPred=FALSE, + transfer=transfer, + algorithm=toDo$misc$algorithm, + gdalCompress=toDo$misc$gdalCompress, + ensembleWeights=toDo$projection$ensembleWeights, + modelNameGrep='fullDomain') + } + }) # try + } # do transfer + + status$prog.transfer.avg=.statusTracker(status$prog.avg,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.transfer.avg==TRUE) { + outMessage=paste0(species,": Skipping; summarizing transfer models failed with: ", status$prog.transfer.avg) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + + status$time.transfer=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit- status$time.project- status$time.eval) + if(verbose>0) print(paste0(species,': done transferring')) + if(verbose>2) print(paste0(species,': time: ',status$time.transfer)) + + #------------------------------------------------------------------- + ## 10. Threshold predictions + #------------------------------------------------------------------- + ### 10.a. under fitting conditions + #------------------------------------------------------------------- + ensemble.tr=lapply(toDo$threshold$thresholds, function(x){ + out=evaluateBin(pres=pres,bg=bg.eval,biased.bg=biased.bg, + modelPath=intersect(list.files(dirs$sp.pred.path, + pattern='pAUCWeighted',full.names=T), + list.files(dirs$sp.pred.path, + pattern=toDo$evaluation$evaluationModel,full.names=T)), + tr='TP',presQuantile=as.numeric(gsub('TP','.',x)), + NATo0=TRUE)[-(1:3)] + names(out)=paste0('pAUCWeighted.',x,'.',names(out)) + out + }) + for(x in seq_along(ensemble.tr)){ + a=ensemble.tr[[x]] + a[-grep('threshold',names(a))]=round(a[-grep('threshold',names(a))],3) + stats[,colnames(ensemble.tr[[x]])]=a + } + + ## do same for continuous + model.r=unrtrans(suppressWarnings(raster(list.files(dirs$sp.pred.path, pattern=paste0(toDo$evaluation$evaluationModel,'__pAUCWeighted'), full.names=T)))) + p.ext=raster::extract(model.r,pres) + a.ext=raster::extract(model.r,bg.eval) + + out=evaluateCont(p=p.ext,a=a.ext, biased.bg=biased.bg, + modelRaster=model.r,partial.auc=list(c(1, .80)), partial.auc.focus=list("sens")) + names(out)=paste0('pAUCWeighted.',names(out)) + stats[,names(out)]=round(out,3) + + #-- these are applied to average predictions rather than each fold. + #-- only do for the 1 weighted ensemble + # this should applies to the full domain ONLY, because we apply masking below + if(toDo$threshold$thresholding){ + if(verbose>0) print(paste0(species,': thresholding ... ')) + prog.threshold=threshold(dirs=dirs, + stats=stats, + pres=pres, + env=env, + saveShapeFile=toSave$shapeFile, + makeThresholdMaps=toDo$threshold$thresholds, + makeSparse=toDo$threshold$makeSparseMatrix, + modelNameGrep='fullDomain', + gdalCompress=toDo$misc$gdalCompress) + + status$prog.threshold= .statusTracker(status$prog.threshold,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.threshold==TRUE) { + outMessage=paste0(species,": Skipping; thresholding failed with: ", status$prog.threshold) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + } + ### 10.b. threshold transferred models + + #------------------------------------------------------------------- + if(!all(is.null(stats.cv))){ + if(toDo$transfer$otherEnvProjections & toDo$threshold$thresholding){ + if(verbose>0) print(paste0(species,': thresholding transferred models ... ')) + prog.threshold.new=try({ + new.envs=list.files(dirs$otherEnvDir,full.names=T,pattern='tif') + if(all(toDo$transfer$whichFutures=='all')){ + keep=seq_along(basename(new.envs)) + } else { + keep=c(mapply(function(x) grep(x,basename(new.envs)),toDo$transfer$whichFutures)) + } + new.envs=new.envs[keep] + for(ii in 1:length(new.envs)){ + envName=tools::file_path_sans_ext(basename(new.envs[ii])) + if(verbose>0) print(paste0(species,': thresholding transferred models - ',envName)) + tmp=threshold(dirs=dirs, + stats=stats, + pres=pres, + env=env, + saveShapeFile=toSave$shapeFileTransfer, + whichModel='mean', + transfer=envName, + modelNameGrep='fullDomain', + makeThresholdMaps=toDo$threshold$makeThresholdMaps, + makeSparse=toDo$threshold$makeSparseMatrix, + gdalCompress=toDo$misc$gdalCompress) + } + }) + } + + status$prog.threshold.new=.statusTracker(status$prog.threshold.new, stats,toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.threshold.new==TRUE) { + outMessage=paste0(species,": Skipping; thresholding transfered models failed with: ", status$prog.thershold.new) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + } + + status$time.threshold=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit- status$time.project- status$time.eval- status$time.transfer) + if(verbose>0) print(paste0(species,': done thresholding')) + if(verbose>2) print(paste0(species,': time: ',status$time.transfer)) + + + ### 10.c. Trinary Maps under fitting conditions + #------------------------------------------------------------------- + # pass toSave to function to save trinary (it should always be made to get upper and lower bounds on range size and pAUC) + if(toDo$threshold$trinaryMap){ + prog.trinary=trinaryMapSDM(dirs, + stats, + env, + pres, + bg1=bg.eval, + openFig=FALSE, + shapesToPlot=toDo$plotting$shapesForPlotting, + doMapPlot=TRUE, + doROCPlot=FALSE, + modelNameGrep1=toDo$evaluation$evaluationModel, + modelNameGrep2='pAUCWeighted', + verbose=5) + + if(!class(prog.trinary)=='try-error') {stats=prog.trinary + } else { + outMessage=paste0(species,": NOT Skipping the species, because its not super important, but trinary maps failed with: ", status$trinary) + message(outMessage) + } + status$prog.trinary=.statusTracker(status$prog.trinary,outObj= prog.trinary, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + #if(!status$prog.trinary==TRUE) return(status) + } + + #------------------------------------------------------------------- + ## 11. Masking predictions + #------------------------------------------------------------------- + #-- the continuous preductions on the fitting data were already masked above to allow evaluation on occupied plus neighboring. I don't think we really need + + #------------------------------------------------------------------- + + ### 11.a. Mask binary maps to occupied (only) and Occ+neighbor ecoregions + #------------------------------------------------------------------- + if(toDo$trimDomainMask){ + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=T))[[1]] + thisMask=raster::reclassify(myMasks,c(.99,2.01,1)) + maskBinaryMaps(thisMask,toDo,dirs,newMapName='occEcoNeigh',species) + if(verbose>0) print(paste0(species,': done masking to occEcoNeigh')) + + ### 11.b. Mask binary maps to Occupied ecoregions + #------------------------------------------------------------------- + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=T))[[1]] + thisMask=raster::reclassify(myMasks,c(1.01,Inf,NA)) # use occupied only + maskBinaryMaps(thisMask,toDo,dirs,newMapName='occEco',species) + if(verbose>0) print(paste0(species,': done masking to occEco')) + } + + + #------------------------------------------------------------------- + ## 12. Plotting + #------------------------------------------------------------------- + + ### 12.a weighted ensemble maps + #------------------------------------------------------------------- + prog.plot=compareSdmStatsPlot(stats=stats, + dirs=dirs, + range.poly=NULL, + pres=pres, + env=env, + speciesInfo=FALSE, + legendLoc='panel', + logscaleZ=TRUE, + modelNameGrep=toDo$evaluation$evaluationModel, + toDo=toDo) + + status$prog.plot=.statusTracker(status$prog.plot,prog.plot, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + + if(!status$prog.plot==TRUE) { + outMessage=paste0(species,": Skipping; plotting maps failed with: ", status$prog.plot) + message(outMessage) + exitSDMWorkflow(dirs=dirs,stats=stats, toSave=toSave,toDo=toDo,status=status) + return(outMessage) + } + ### 12.b. Make response curves + #------------------------------------------------------------------- + if(toDo$plotting$responseCurves){ + if(verbose>0) print(paste0(species,': plotting response curves ...')) + prog.response.curves=try(responseCurves(dirs, + env, + stats, + bg=bg, + toDo=toDo, + best.var=best.var, + priors=NA, + pres=pres, + envMeans=env.means, + envSDs=env.sd, + extrapLimits=extrapLimits, + openFigs=FALSE)) + status$prog.response.response.curves=.statusTracker( status$prog.prog.response.curves,prog.response.curves, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(class(prog.response.curves)=='try-error') { + outMessage=paste0(species,": NOT Skipping the species because its not a super important issue, but plotting response curves failed with: ", status$prog.response.curves) + message(outMessage) + } + } + + if(verbose>0) print(paste0(species,': done plotting')) + + #------------------------------------------------------------------- + ## 13. Clean up / Record model metadata + #------------------------------------------------------------------- + # TODO set this up for rmm objects + if(toDo$metadata$bienMetadata){ md=makeBIENMetadata(stats=stats,dirs=dirs,toDo=toDo) + sp=statsPublic(stats, statPublicFile=paste0(dirs$algorithmDirs$PPM$statPublicDir, '/',species,'__StatsPub.csv')) + } + #rmm=rmmsBIEN(dirs=dirs,stats=stats,toDo=toDo) + if(verbose>2) print(paste0(species,': writing statistics and cleaning')) + status$time.all=stats$runTime=round((proc.time() - start.time)[3]) + status$finished=TRUE + + ex=exitSDMWorkflow(dirs=dirs,stats=stats,status=status,toSave=toSave, toDo=toDo) + + # clean up tmp files + unlink(rasterOptions(overwrite=T)$tmpdir,recursive=T) + + if(verbose>0) print(paste0(species,': finished model in ', status$time.all)) + if(verbose>2) print(paste0(species,': -------------------------')) + + outMessage=paste0(species,': looks good') + print(outMessage) + return(outMessage) + }) + return(out) +} + diff --git a/SDM Code/workflowRangeBagging.r b/SDM Code/workflowRangeBagging.r new file mode 100644 index 0000000..9218b92 --- /dev/null +++ b/SDM Code/workflowRangeBagging.r @@ -0,0 +1,541 @@ + +sdmRangeBag=function(speciesCSV, + allBaseDirs, + toDo=toDo, + toSave=toSave, + toOverwrite=toOverwrite){ + + out=try({ + + verbose=toDo$misc$verbose + algorithm='RangeBag' + + start.time=proc.time() + species=unlist(strsplit(basename(speciesCSV),'.csv')) + if(verbose>0) print(paste0(species,': ','Process started at ', round(start.time[3]))) + + #-- generate random seed to be sure to get the same results for each run + let=strsplit(strsplit(species,'_')[[1]][2],'')[[1]][1:4] + seed=as.integer(paste(which(letters %in% let),collapse='')) + set.seed(seed) + + #------------------------------------------------------------------- + ## 1. Important Objects + #------------------------------------------------------------------- + #-- these are the 3 main objects referenced throughout the workflow + #-- status monitors workflow progress and catches errors + + status=.statusDF(species) + + dirs=speciesDirectories(species=species, + allBaseDirs=allBaseDirs, + algorithm=toDo$misc$algorithm, + deletePastModels=toDo$misc$deletePastModels, + writeSDPred=toSave$SDPred, + whichFutures=toDo$transfer$whichFutures, + saveModelObj=toSave$modelObj) + if(class(dirs)=='try-error') {print(paste0(species,': ','dirs are messed up')) ; return()} + # set up temp raster path to delete later + rasterOptions(tmpdir=dirs$temp.path) + if(verbose>2) print(paste0(species,': ','directories created ')) + + #-- specify which models to fit + # this is for old compatibility, might be smarter to integrate this with stats storage + modelSettings=list(samplingSettings=toDo$sampling$samplingSettings, + expertSettings=toDo$fitting$expertSettings, + predictorSettings=toDo$fitting$predictorSettings, + algorithmSettings= toDo$fitting$algorithmSettings, + formulaMaker=toDo$fitting$formulaMaker) + + stats=.statsStorage(species,modelSettings,type=algorithm,toDo) + #!! only works for a single model at a time + stats$rbV=toDo$fitting$rbV + stats$rbD=toDo$fitting$rbD + stats$rbP=toDo$fitting$rbP + stats$uniqueID=paste0(species,'__',stats$modelNames,'__', toDo$misc$runName) + #------------------------------------------------------------------- + ## 2. Environment + #------------------------------------------------------------------- + + ### 2.a read/clean data + #------------------------------------------------------------------- + envFile=list.files(dirs$envDir,'.tif',full.names=TRUE) + env=suppressWarnings(stack(envFile)) + + #### 2.a.i different layers for pres and sampling + #-- none yet + + ### 2.b preselect layers + #------------------------------------------------------------------- + layerNames=try(read.csv(list.files(dirs$envDir,'.csv', full.names=TRUE),header=FALSE, stringsAsFactors=FALSE)) + if(!class(layerNames)=='try-error'){ + best.var=layerNames[,1] #names(env) + names(env)=best.var + } else { + print('add a file with your layer names called layerNames.csv in the Env folder or bad shit could happen') + best.var=names(env) + } + + #------------------------------------------------------------------- + ## 3. Occurrence + #------------------------------------------------------------------- + + ### 3.a read/clean data and optionally thin + #------------------------------------------------------------------- + if(verbose>2) print(paste0(species,': ','cleaning occurrences ')) + prog.points=cleanOcc(speciesCSV=speciesCSV, + env=env, + doClean=FALSE, + writeResult=FALSE, + dirs=dirs) + + status$prog.points=.statusTracker(status$prog.points,prog.points, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.points==TRUE) return(status) + pres=prog.points$pres + # do the extract once at beginning + pres@data=cbind(pres@data,raster::extract(env,pres,ID=FALSE)) + if(nrow(unique(pres@data))<3){ + message(paste0(species,': there are less than 3 unique points in environmental space, so skipping')) + return() + } + + if(any(toDo$presences$removeSpatialOutliers, + toDo$presences$removeEnvOutliers)){ + prog.outliers=findOutlyingPoints(pres, + spOutliers= toDo$presences$removeSpatialOutliers, + envOutliers= toDo$presences$removeEnvOutliers, + bestVar=best.var, + doPlot=TRUE, + plotFile=paste0(dirs$sp.diag.path, '/OccurrenceOutliers.png'), + pval=toDo$presences$pvalSet, + env=env, + species=species) + + pres=prog.outliers$pres + #-- record number tossed + stats$n.spatial.outliers=length(prog.outliers$spatialToss) + stats$n.env.outliers=length(prog.outliers$envToss) + } + status$has.points=ifelse(nrow(pres)>0,TRUE, FALSE) + status$n.points=nrow(pres) + stats$n.pres=length(pres) + + # # status$prog.outliers=.statusTracker(status$prog.outliers,prog.outliers, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + # # if(!status$prog.outliers==TRUE) return(status) + + if(verbose>2) print(paste0(species,': ',length(pres),' presence points')) + + #------------------------------------------------------------------- + ## 4. Sampling + #------------------------------------------------------------------- + + if(verbose>0) print(paste0(species,': ','trimming domain....')) + + ### 4.b determine modeling domain + #------------------------------------------------------------------- + #add json string to store which ecoregions were used + env=makeDomain(dirs=dirs, + biased.bg=NULL, + env=env, + pres=pres, + method=toDo$domain$domainTrimMethod, + writeResult=toSave$domain, + overwrite=toOverwrite$domain, + overwriteBiasMask=toOverwrite$biasMask, + trimBufferkm=toDo$domain$trimBufferkm, + maxTime=toDo$domain$domainMaxTime, + domainClumpDist=toDo$domain$domainClumpDist, + coarseCrop=toDo$domain$coarseCrop, + gdalCompress=toDo$misc$gdalCompress) + if(class(env)=='try-error') stop(paste0(species,': could not trim domain, so skipping. Maybe this species did not fall within an ecoregion.') ) + # add plot of correlations and what was tossed! + + #-- mask to be used later for trimming the domain for different apps + if(toDo$trimDomainMask){ + ecoMask=makeDomain(dirs=dirs, + biased.bg=NULL, + env=env, + pres=pres, + method='ecoregionAndNeighborsRaster', + writeResult=toSave$domain, + overwrite=toOverwrite$domain, + overwriteBiasMask=toOverwrite$biasMask, + trimBufferkm=toDo$domain$trimBufferkm, + maxTime=toDo$domain$maxTrimDomainTime, + domainClumpDist=toDo$domain$domainClumpDist, + coarseCrop=TRUE, + fixWeirdDisjunctEcoregions= toDo$domain$fixWeirdDisjunctEcoregions, + gdalCompress=toDo$misc$gdalCompress, + saveMasks=TRUE) + env$ecoMask=raster::extend(ecoMask$trim.domain.2,env) # save mask to make bg + } else { + env$ecoMask=1 #hack to avoid changing any later code. leads to useless calculations below but not useless cory time + } + + if(toDo$presences$removeCorrelatedPredictors){ + rcp=removeCorrelatedPredictors(env, + best.var=best.var, + predictorsToKeepNoMatterWhat= + toDo$presences$predictorsToKeepNoMatterWhat, + species) + env=rcp[['env']]; best.var=rcp[['best.var']] + } + + status$prog.domain=.statusTracker(status$prog.domain,env, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.domain==TRUE) return(status) + if(verbose>0) print(paste0(species,': ','done trimming domain')) + + ### 4.d sample background + #------------------------------------------------------------------- + #-- based on the trimmed domain + # the biased background is going to the wrong folder and woverwriting the unbaised. turned off saving for now. + prog.bg=makeBackground(env=env, + dirs=dirs, + stats=stats, + biased.bg=NULL, + maxBGFit=toDo$background$maxBGFit, + writeResult=toSave$background, + overwrite=toOverwrite$background) + if(class(prog.bg$bg)=="character"){ + if(prog.bg$bg=='noData'){ + print(paste0(species, ': bailing because <10 background found ')) + return(status) + } + } + bg=prog.bg$bg + + status$prog.bg=.statusTracker(status$prog.bg,prog.bg, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.bg==TRUE) return(status) + + #------------------------------------------------------------------- + ## 6. SDM + #------------------------------------------------------------------- + + ### 6.a full model + #------------------------------------------------------------------- + if(verbose>0) print(paste0(species,': ','fitting full model ... ')) + #env=env[[-c(grep('targetBG.mask',names(env)), grep('trim.domain',names(env)))]] + # TODO record number of models failed + prog.fit.full=sdmRangeBag2(dirs, + pres.fit=pres, + k=NULL, + name='full', + stats=stats, + maxTime=toDo$fitting$maxTime, + best.var=best.var, + saveModelObj=toSave$modelObj, + runModel=TRUE, + evalFork=toDo$fitting$evalFork) + if(prog.fit.full$mods=='noData') return(status) + + status$prog.fit.full=.statusTracker(status$prog.fit.full, prog.fit.full,toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.fit.full==TRUE) return(status) + if(is.na(prog.fit.full$mods)) {full.mods=NULL} else { full.mods=prog.fit.full$mods } + if(toDo$fitting$runFullModel) stats=prog.fit.full$stats + if(verbose>0) print(paste0(species,': ','done fitting full model ')) + + #------------------------------------------------------------------- + ## 7. Projection + #------------------------------------------------------------------- + + if(verbose>0) print(paste0(species,': ','projecting ...')) + + ### 7.a. Project full model + #------------------------------------------------------------------- + prog.proj.full=projectSDM(dirs=dirs, + mod.out=full.mods, + stats=stats, + projEnv=env, + name=paste0(stats$species[1],'__full'), + fileNameSuffix='_fullDomain', + best.var=best.var) + status$prog.proj.full=.statusTracker(status$prog.proj.full,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.proj.full==TRUE) return(status) + stats=prog.proj.full$stats + if(toDo$misc$rmObjs) rm(prog.proj.full) + + status$time.project=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit) + if(verbose>0) print(paste0(species,': ','done projecting ')) + if(verbose>0) print(paste0(species,': ',"time: ",status$time.project)) + + ### 7.b. Mask continuous preds to relevant ecoregions (occ+neighbors) + #------------------------------------------------------------------- + # basically we won't use the continuous predictions based on buffered points for anything; they'll just hang around in case we need them in the future. + + fullDomainFiles=list.files(dirs$sp.pred.path,pattern='full', full.names=TRUE) + # 10/12/21: i think this can just be plugged in from the ppm function to replace gthe commented stuff below + if(toDo$projection$maskToOccEcoNeighbors){ + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=T))[[1]] + occEcoNeighFiles=sub('_fullDomain.tif','_occEcoNeigh.tif', fullDomainFiles,fixed=T) + #-- occupied and neighbors + thisMask=raster::reclassify(myMasks,c(.99,2.01,1)) + a=lapply(seq_along(fullDomainFiles),function(x){ + fold.r=suppressWarnings(raster(fullDomainFiles[x])) + fold.r2=raster::crop(fold.r,thisMask) + fold.r3=raster::mask(fold.r2,thisMask) + .writeRasterCM(fold.r3,occEcoNeighFiles[x],toDo$misc$gdalCompress) + }) + } + #Maybe this should happen below after i've taken the ensemble means. i think i only really need the folds for the fullDomain stored and for occEcoNeigh to evaluate + #-- occupied only + if(toDo$trimDomainMask){ + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=T))[[1]] + occEcoFiles=sub('_fullDomain.tif','_occEco.tif',fullDomainFiles,fixed=T) + thisMask=raster::reclassify(myMasks,c(1.01,Inf,NA)) + a=lapply(seq_along(fullDomainFiles),function(x){ + fold.r=suppressWarnings(raster(fullDomainFiles[x])) + fold.r2=raster::crop(fold.r,thisMask) + fold.r3=raster::mask(fold.r2,thisMask) + .writeRasterCM(fold.r3,occEcoFiles[x],toDo$misc$gdalCompress) + }) + } +# occEcoNeighFiles=sub('_fullDomain.tif','_occEcoNeigh.tif', fullDomainFiles,fixed=TRUE) +# +# #-- by convention, the first layer is the ecoregion one +# myMasks=suppressWarnings(raster::stack(list.files( dirs$sp.mask.path,full.names=TRUE)))[[1]] +# #-- occupied and neighbors +# thisMask=raster::reclassify(myMasks,c(.99,2.01,1)) +# lapply(seq_along(fullDomainFiles),function(x){ +# fold.r=raster(fullDomainFiles[x]) +# fold.r2=raster::crop(fold.r,thisMask) +# fold.r3=raster::mask(fold.r2,thisMask) +# .writeRasterCM(fold.r3,occEcoNeighFiles[x],toDo$misc$gdalCompress) +# }) +# #-- occupied only +# occEcoFiles=sub('_fullDomain.tif','_occEco.tif',fullDomainFiles,fixed=TRUE) +# thisMask=raster::reclassify(myMasks,c(1.01,Inf,NA)) +# lapply(seq_along(fullDomainFiles),function(x){ +# fold.r=raster(fullDomainFiles[x]) +# fold.r2=raster::crop(fold.r,thisMask) +# fold.r3=raster::mask(fold.r2,thisMask) +# .writeRasterCM(fold.r3,occEcoFiles[x],toDo$misc$gdalCompress) +# }) + #------------------------------------------------------------------- + ## 8. Evaluate + #------------------------------------------------------------------- + + if(verbose>0) print(paste0(species,': ','evaluating ...')) + + ### 8.a Evaluate continuous predictions + #------------------------------------------------------------------- + prog.eval.cont=evaluateContFullCV(pres=pres, + bg=bg, + dirs=dirs, + stats=stats, + full.mods=full.mods, + maxBGEval=toDo$background$maxBGEval, + whichModel='full', + modelNameGrep= toDo$evaluation$evaluationModel) + + status$prog.eval.cont=.statusTracker(status$prog.eval.cont, prog.eval.cont, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.eval.cont==TRUE) return(status) + stats=prog.eval.cont$stats + + ### 8.b Evaluate binary predictions + #------------------------------------------------------------------- + prog.eval.bin=evaluateBinFullCV(pres=pres, + bg=bg, + dirs=dirs, + stats=stats, + thresholds=toDo$threshold$thresholds, + whichModel='full', + modelNameGrep= toDo$evaluation$evaluationModel) + stats=prog.eval.bin + status$prog.eval.bin=.statusTracker(status$prog.eval.bin,stats,toDo) + if(!status$prog.eval.bin==TRUE) return(status) + + ### 8.c Find best models + #------------------------------------------------------------------- + # CM 3/31/21 - seems to work but not used for anything + # prog.best=bestModelIndicator(stats=stats) + # stats=prog.best + # status$prog.find.best=.statusTracker(status$prog.find.best,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + # if(!status$prog.find.best==TRUE) return(status) + + status$time.eval=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit- status$time.project) + if(verbose>0) print(paste0(species,': ','done evaluating')) + if(verbose>2) print(paste0(species,': ',"time: ",status$time.eval)) + + #------------------------------------------------------------------- + ## 9. Transfer model to new conditions + #------------------------------------------------------------------- + ### 9.a. Project CV models + #------------------------------------------------------------------- + #-- this only project to the full domain + prog.transfer=NULL + if(toDo$transfer$otherEnvProjections){ + if(verbose>0) print(paste0(species,': ','transferring ... ')) + # environment for transfer + env.new=NULL + prog.transfer=try({ + new.envs=list.files(dirs$otherEnvDir,full.names=TRUE,pattern='tif') + if(any(toDo$transfer$whichFutures=='all')){ + keep=seq_along(basename(new.envs)) + } else { + keep=c(mapply(function(x) grep(x,basename(new.envs)),toDo$transfer$whichFutures)) + } + new.envs=new.envs[keep] + + for(ii in 1:length(new.envs)){ + print(paste0(species,': ',' projecting ',tools::file_path_sans_ext(basename(new.envs[ii])))) + env.new=raster::stack(new.envs[ii]) + layerNames=try(read.csv(list.files(dirs$envDir,'.csv', full.names=TRUE),header=FALSE, stringsAsFactors=FALSE)) + if(!class(layerNames)=='try-error'){ + names(env.new)=layerNames[,1] + } else { + names(env.new)=best.var + } + # subset just the best variables used in modeling for speed + env.new=env.new[[best.var]] + + aa=.cropcrop(env.new,env[['trim.domain']]) + env.new=raster::stack(aa[[1]],aa[[2]]) + #cv.stats=vector('list',length(unique(pres$folds))) + #for(k in sort(unique(pres$folds))){ + transfer=tools::file_path_sans_ext(basename(new.envs[ii])) + tmp.proj=projectSDM(dirs=dirs, + mod.out=full.mods, + stats=stats, + projEnv=env.new, + priors=NA, + name=paste0(stats$species[1],'__full'), + best.var=best.var, + transfer=transfer, + fileNameSuffix='_fullDomain', + algorithm=toDo$misc$algorithm, + verbose=1) + #} + } + }) + if(toDo$misc$rmObjs) rm(env.new) + } + + status$prog.transfer=.statusTracker(status$prog.transfer,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.transfer==TRUE) return(status) + + status$time.transfer=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit- status$time.project- status$time.eval) + if(verbose>0) print(paste0(species,': ','done transferring')) + if(verbose>2) print(paste0(species,': ',"time: ",status$time.transfer)) + + #------------------------------------------------------------------- + ## 10. Threshold predictions + #------------------------------------------------------------------- + ### 10.a. under fitting conditions + #------------------------------------------------------------------- + #-- this is only for full domains; we mask later + #-- note that the shapefile is for the full domain, which is dumb, but we only use it for quick plots so doesn't really matter. + if(toDo$threshold$thresholding){ + if(verbose>0) print(paste0(species,': ','thresholding ... ')) + prog.threshold=threshold(dirs=dirs, + stats=stats, + pres=pres, + env=env, + saveShapeFile=toSave$shapeFile, + makeThresholdMaps= toDo$threshold$thresholds, + whichModel='full', + modelNameGrep='fullDomain', + algorithm=toDo$misc$algorithm, + makeSparse=toDo$threshold$makeSparseMatrix, + gdalCompress=toDo$misc$gdalCompress) + + status$prog.threshold= .statusTracker(status$prog.threshold,stats, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.threshold==TRUE) return(status) + } + + ### 10.b. under new conditions + #------------------------------------------------------------------- + if(toDo$transfer$otherEnvProjections & toDo$threshold$thresholding) { + if(verbose>0) print(paste0(species,': ','thresholding transferred models ... ')) + prog.threshold.new=try({ + new.envs=list.files(dirs$otherEnvDir,full.names=TRUE,pattern='tif') + if(any(toDo$transfer$whichFutures=='all')){ + keep=seq_along(basename(new.envs)) + } else { + keep=c(mapply(function(x) grep(x,basename(new.envs)),toDo$transfer$whichFutures)) + } + for(ii in keep){ + envName=tools::file_path_sans_ext(basename(new.envs[ii])) + tmp.thresh=threshold(dirs=dirs, + stats=stats, + pres=pres, + env=env, + saveShapeFile=toSave$shapeFileTransfer, + whichModel='full', + transfer=envName, + makeThresholdMaps= toDo$threshold$makeThresholdMaps, + algorithm=toDo$misc$algorithm, + modelNameGrep='fullDomain', + makeSparse=toDo$threshold$makeSparseMatrix, + gdalCompress=toDo$misc$gdalCompress) + } + }) + status$prog.threshold.new=.statusTracker(status$prog.threshold.new, stats,toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + if(!status$prog.threshold.new==TRUE) return(status) + } + + status$time.threshold=round((proc.time() - start.time)[3]- status$time.input.prep - status$time.fit- status$time.project- status$time.eval- status$time.transfer) + if(verbose>0) print(paste0(species,': ','done thresholding')) + if(verbose>2) print(paste0(species,': ',"time: ",status$time.transfer)) + + #------------------------------------------------------------------- + ## 11. Masking predictions + #------------------------------------------------------------------- + #-- the continuous preductions on the fitting data were already masked above to allow evaluation to occupied + neighbors + + # ### 11.a. Mask binary maps to Occupied + neighboring ecoregions + #------------------------------------------------------------------- + if(toDo$trimDomainMask){ + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=TRUE))[[1]] + thisMask=raster::reclassify(myMasks,c(.99,2.01,1)) + maskBinaryMaps(thisMask,toDo,dirs,newMapName='occEcoNeigh',species) + if(verbose>0) print(paste0(species,': done masking to occEcoNeigh')) + + ### 11.b. Mask binary maps to Occupied ecoregions + #------------------------------------------------------------------- + myMasks=raster::stack(list.files(dirs$sp.mask.path,full.names=TRUE))[[1]] + thisMask=raster::reclassify(myMasks,c(1.01,Inf,NA)) # use occupied only + maskBinaryMaps(thisMask,toDo,dirs,newMapName='occEco',species) + if(verbose>0) print(paste0(species,': done masking to occEco')) + } + #------------------------------------------------------------------- + ## 12. Plotting + #------------------------------------------------------------------- + prog.plot=compareSdmStatsPlot(stats=stats, + dirs=dirs, + range.poly=NULL, + pres=pres, + env=env, + speciesInfo=FALSE, + legendLoc='panel', + logscaleZ=TRUE, + modelNameGrep= toDo$evaluation$evaluationModel, + toDo=toDo) + + status$prog.plot=.statusTracker(status$prog.plot,prog.plot, toSave=toSave,toDo=toDo, dirs=dirs,stats=stats,status=status) + + if(!status$prog.plot==TRUE) return(status) + status$prog.plot=.statusTracker(status$prog.plot,stats$plotPath) + if(verbose>0) print(paste0(species,': ','done plotting')) + + #------------------------------------------------------------------- + ## 13. Clean up / Record model metadata + #------------------------------------------------------------------- + # later set this up for rmm objects + if(toDo$misc$bienMetadata) md=makeBIENMetadata(stats=stats,dirs=dirs,toDo=toDo) + + if(verbose>2) print(paste0(species,': ',"writing statistics and cleaning")) + status$time.all=stats$runTime=round((proc.time() - start.time)[3]) + status$finished=TRUE + e=exitSDMWorkflow(dirs=dirs,stats=stats,status=status,toSave=toSave, toDo=toDo) + + # clean up tmp files + unlink(rasterOptions(overwrite=TRUE)$tmpdir,recursive=TRUE) + + if(verbose>0) print(paste0(species,': ',"finished model in ", status$time.all)) + if(verbose>2) print(paste0(species,': ','-------------------------')) + + return(status) + }) + return(out) +} + diff --git a/SDM pipeline analytics/.Rhistory b/SDM pipeline analytics/.Rhistory deleted file mode 100644 index e69de29..0000000