Skip to content

Latest commit

 

History

History
204 lines (165 loc) · 7.69 KB

example_extracting_chunks_BirdNET.md

File metadata and controls

204 lines (165 loc) · 7.69 KB

Demo extracting clips based on BirdNET output

Example code for reading BirdNET output and exporting detections in species subfolders

#devtools::install_github('BritishTrustForOrnithology/AcousticTools')
library(AcousticTools)

#read a BirdNET output
df <- AcousticTools::read_birdnet_results(folder = "H:/Leiothrix Recordings/ST4037")
## Folder contains individual Audacity files
head(df)
##                                                       original_wav start end
## 1 H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav    15  18
## 2 H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav    21  24
## 3 H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav    24  27
## 4 H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav    30  33
## 5 H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav    33  36
## 6 H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav    33  36
##         birdnet_scientific_name birdnet_english_name  score lat lon week
## 1                           Gun                  Gun 0.1106  NA  NA   NA
## 2             Muscicapa striata   Spotted Flycatcher 0.1904  NA  NA   NA
## 3             Muscicapa striata   Spotted Flycatcher 0.6818  NA  NA   NA
## 4             Muscicapa striata   Spotted Flycatcher 0.7349  NA  NA   NA
## 5 Coccothraustes coccothraustes             Hawfinch 0.1544  NA  NA   NA
## 6             Muscicapa striata   Spotted Flycatcher 0.1177  NA  NA   NA
##   overlap sensitivity min_conf species_list model
## 1      NA          NA       NA           NA    NA
## 2      NA          NA       NA           NA    NA
## 3      NA          NA       NA           NA    NA
## 4      NA          NA       NA           NA    NA
## 5      NA          NA       NA           NA    NA
## 6      NA          NA       NA           NA    NA

Currently we have all detections. We might want to do some filtering or sampling. For this simple example I’m just going to limit to high scoring detections. More complex approaches, like site- or species-based stratified sampling could be adopted.

#keep all high scoring detections 
df <- subset(df, score >=0.9)

Assuming files are in recognised YYYYMMDD-HHMMSS format I recommend extracting the date and time as a datetime variable. Then the detection offset (start) can be added to get the start datetime of the detection. First split the filename using stringr::str_split_fixed.

library(stringr)
## Warning: package 'stringr' was built under R version 4.2.3
#split the original filename parts
bits <- setNames(as.data.frame(stringr::str_split_fixed(string = basename(df$original_wav), pattern = "-|\\.", n = Inf)), 
                 c('date_str', 'time_str', 'loc', 'rec', 'mic','ext'))
head(bits)
##   date_str time_str    loc rec mic ext
## 1 20240716   140000 ST4037 BTO  XM wav
## 2 20240716   150000 ST4037 BTO  XM wav
## 3 20240716   160000 ST4037 BTO  XM wav
## 4 20240716   160000 ST4037 BTO  XM wav
## 5 20240716   170000 ST4037 BTO  XM wav
## 6 20240716   201900 ST4037 BTO  XM wav
#join to the main df
df <- cbind(df, bits)

Now convert this to a date using lubridate and add the start time offset for the detection.

library(lubridate)
## Warning: package 'lubridate' was built under R version 4.2.3

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
#extract the datetime of the original recording
df$recording_start_dt <- as.POSIXct(paste(df$date_str, df$time_str), format = "%Y%m%d %H%M%S")

#add seconds offset to get detecion datetime
df$detection_dt <- df$recording_start_dt + df$start

head(df[,c("original_wav", "start", "recording_start_dt", "detection_dt")])
##                                                          original_wav start
## 78   H:/Leiothrix Recordings/ST4037/20240716-140000-ST4037-BTO-XM.wav   486
## 296  H:/Leiothrix Recordings/ST4037/20240716-150000-ST4037-BTO-XM.wav   240
## 500  H:/Leiothrix Recordings/ST4037/20240716-160000-ST4037-BTO-XM.wav   267
## 540  H:/Leiothrix Recordings/ST4037/20240716-160000-ST4037-BTO-XM.wav   360
## 787  H:/Leiothrix Recordings/ST4037/20240716-170000-ST4037-BTO-XM.wav   447
## 1075 H:/Leiothrix Recordings/ST4037/20240716-201900-ST4037-BTO-XM.wav   132
##       recording_start_dt        detection_dt
## 78   2024-07-16 14:00:00 2024-07-16 14:08:06
## 296  2024-07-16 15:00:00 2024-07-16 15:04:00
## 500  2024-07-16 16:00:00 2024-07-16 16:04:27
## 540  2024-07-16 16:00:00 2024-07-16 16:06:00
## 787  2024-07-16 17:00:00 2024-07-16 17:07:27
## 1075 2024-07-16 20:19:00 2024-07-16 20:21:12

Now we can use the detection datetime to make a unique filename. In this example we will put chunks in species folders.

#make the new filename. Use __ to infer unknown species at this point
df$newfilename <- paste0(format(df$detection_dt, "%Y%m%d-%H%M%S"),
                         '-',
                         df$loc,
                         '-',
                         df$rec,
                         '-',
                         df$mic,
                         '-__.',
                         df$ext)
                         

#where will chunks be saved?
path_export <- 'C:/exports'

#make the full path and filename of each clip to be exported
df$chunk_fullname <- file.path(path_export, df$birdnet_english_name, df$newfilename)
head(df$chunk_fullname)
## [1] "C:/exports/Redwing/20240716-140806-ST4037-BTO-XM-__.wav"                  
## [2] "C:/exports/Redwing/20240716-150400-ST4037-BTO-XM-__.wav"                  
## [3] "C:/exports/Redwing/20240716-160427-ST4037-BTO-XM-__.wav"                  
## [4] "C:/exports/Redwing/20240716-160600-ST4037-BTO-XM-__.wav"                  
## [5] "C:/exports/Marsh Tit/20240716-170727-ST4037-BTO-XM-__.wav"                
## [6] "C:/exports/Chestnut-backed Chickadee/20240716-202112-ST4037-BTO-XM-__.wav"

Now we can do the exporting using AcousticTools::extract_chunk. In this example I will export 5 second chunks centred on the 3 second BirdNet detections.

#iterate over chunks - just do first 10 for demo. 
n <- nrow(df)
n <- 10
for(i in 1:n) {
  AcousticTools::extract_chunk(file_wav = df$original_wav[i],
                               file_chunk = df$chunk_fullname[i],
                               start = df$start[i], 
                               end = df$end[i], 
                               chunk_duration = 5,
                               verbose = TRUE)
  
}

Fully anonymised chunk names

If you want to export fully anonymised files this can be done as follows. This date time steps can be ignored in this case. Remember to export the lookup so you know what L4G4Y06Y3YF2TFF.wav actually was!

library(stringi)
## Warning: package 'stringi' was built under R version 4.2.2
#how many files need anonymised names
num_files <- nrow(df)
#make random strings of 15 characters and/or numbers
df$randstrings <- stringi::stri_rand_strings(n = num_files, length = 15, pattern = "[A-Z0-9]")
#make into filenames
df$chunk_fullname_anonymised <- file.path(path_export, paste0(df$randstrings, ".wav"))
head(df$chunk_fullname_anonymised)
## [1] "C:/exports/5ITCNU7P6WCGX8G.wav" "C:/exports/RFY9ZO9OD85175D.wav"
## [3] "C:/exports/LH23QWOAT4NIQW5.wav" "C:/exports/PG296924EFKNAJO.wav"
## [5] "C:/exports/OIRBXTYH94N5776.wav" "C:/exports/PG18L4TYHMXA55U.wav"
#remember to save the lookup!
save(df, file = file.path(path_export, "detection_to_anonymised_names_lookup.Rdata"))