Skip to content

Commit

Permalink
Support for AWS datasets (#6)
Browse files Browse the repository at this point in the history
Co-authored-by: Tim Holy <[email protected]>
Co-authored-by: Dae Woo Kim <[email protected]>
  • Loading branch information
timholy and kdw503 authored Mar 8, 2024
1 parent 23c50cc commit 65e6ab7
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 2 deletions.
5 changes: 5 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ authors = ["Tim Holy <[email protected]>"]
version = "0.2.0"

[deps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
Expand All @@ -15,12 +17,14 @@ IndirectArrays = "9b13fd28-a010-5f03-acff-a1bbcff69959"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
NRRD = "9bb6cfbd-7763-5393-b1b5-1c8e09872146"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"

[weakdeps]
Expand All @@ -30,6 +34,7 @@ Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
AllenBrainMakieExt = "Makie"

[compat]
AWSS3 = "0.10, 0.11"
AxisArrays = "0.4"
CoordinateTransformations = "0.6"
FileIO = "1"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
[![Build Status](https://travis-ci.org/JuliaNeuroscience/AllenBrain.jl.svg?branch=master)](https://travis-ci.org/JuliaNeuroscience/AllenBrain.jl)
[![codecov.io](http://codecov.io/github/JuliaNeuroscience/AllenBrain.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaNeuroscience/AllenBrain.jl?branch=master)

**NOTE**: this was written against Julia 0.6 and only superficially updated, just to get it to build on Julia 1.x.
Anyone wanting to use this package will likely have to fix some bugs.
**NOTE**: this was written against Julia 0.6 and did not have any tests, and has received only minor updates since.
It's likely that there are lurking bugs.

AllenBrain can query the *in situ* and projection databases of the [Allen Brain Atlas](https://portal.brain-map.org/).
It can also generate 3d visualizations colored by brain region using Makie.
Expand Down
6 changes: 6 additions & 0 deletions src/AllenBrain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module AllenBrain

using Statistics
using JSON, LightGraphs, IndirectArrays, AxisArrays, OffsetArrays
using JSON3, StructTypes
using FilePathsBase
using StaticArrays, CoordinateTransformations
using ImageTransformations, ImageCore, Interpolations
using IntervalSets, ProgressMeter, FileIO
Expand All @@ -12,6 +14,8 @@ export #
# basic types
BoundingBox,
buffer,
# AWS data
awsmanifest,
# ontology
ontology,
findvertices,
Expand All @@ -26,6 +30,7 @@ export #
sectionimage,
download_insitu_images,
splice_sectionimages,
download_dir,
# genes
query_insitu,
# visualization
Expand Down Expand Up @@ -62,6 +67,7 @@ inmicrons(x::Real) = x
inmicrons(x) = x/(1μm)

include("types.jl")
include("awsdata.jl")
include("ontology.jl")
include("images.jl")
include("projections.jl")
Expand Down
119 changes: 119 additions & 0 deletions src/awsdata.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
using AWSS3
using AWSS3: AWSConfig

const allen_config = AWSConfig(; creds=nothing, region="us-west-2")

# Create a few types that allow JSON3 to parse the manifest.json file
struct AWSDir
relative_path::String
version::String
total_size::Int
url::String
view_link::String
end
Base.show(io::IO, d::AWSDir) = print(io, "AWSDir(\"", d.relative_path, "\", size=", d.total_size, ")")

struct AWSDirs
directories::Dict{String,AWSDir}
end
Base.show(io::IO, d::AWSDirs) = print(io, [k => d for (k, d) in d.directories])
Base.getindex(d::AWSDirs, k) = d.directories[k]
Base.iterate(d::AWSDirs) = iterate(d.directories)
Base.iterate(d::AWSDirs, i) = iterate(d.directories, i)

struct AWSManifest
resource_uri::String
version::String
directory_listing::Dict{String,AWSDirs}
file_listing::Dict{String,Any}
end
Base.show(io::IO, manifest::AWSManifest) = print(io, "AWSManifest(version = \"", manifest.version,
"\", length(directory_listing) = ", length(manifest.directory_listing),
", length(file_listing) = ", length(manifest.file_listing), ")")

StructTypes.StructType(::Type{AWSDir}) = StructTypes.Struct()
StructTypes.StructType(::Type{AWSDirs}) = StructTypes.Struct()
StructTypes.StructType(::Type{AWSManifest}) = StructTypes.Struct()

"""
awsmanifest(version)
Download Allen Brain manifest data
# Positional arguments:
- `version`: version number of Allen Brain Dataset
# Examples
```jldoctest
julia> version = "20231215"
"20231215"
julia> manifest = awsmanifest(version)
AWSManifest(version = "20231215", length(directory_listing) = 18, length(file_listing) = 18)
```
"""
function awsmanifest(version)
url = "https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/releases/$version/manifest.json"
rq = HTTP.request("GET", url)
return JSON3.read(String(rq.body), AWSManifest)
end

bucket(manifest::AWSManifest) = startswith(manifest.resource_uri, "s3://") ? split(manifest.resource_uri, "/")[3] : error("Not an S3 URI: ", manifest.resource_uri)

"""
download_dir(manifest, relative_path, to; config=AWSConfig(; creds=nothing, region="us-west-2"))
Download Allen Brain data from `manifest.resource_uri * relative_path` to `to`.
# Positional arguments:
- `manifest`: Allen Brain manifest data (see [`awsmanifest`](@ref))
- `relative_path`: Relative path of the data you want to download. Full path will be constructed with `manifest.resource_uri * relative_path`.
- `to` : local path (folder and file name) to which the download will be saved
# Keyword arguments:
- `config`: AWS configuration for accessing the service
# Examples
To download data with `feature_matrix_label = "WMB-10Xv2-TH"` which is in `dataset_label="WMB-10Xv2"`
```jldoctest
julia> expression_matrices = manifest.file_listing["WMB-10Xv2"]["expression_matrices"]
Dict{String, Any} with 10 entries:
"WMB-10Xv2-OLF" => Dict{String, Any}("raw"=>Dict{String, Any}("files"=>Dict{String, Any}("h5ad"=>Dict{String, Any}("relative_path"=>"expression_matrices/WMB-10Xv2/2…
...
"WMB-10Xv2-Isocortex-4" => Dict{String, Any}("raw"=>Dict{String, Any}("files"=>Dict{String, Any}("h5ad"=>Dict{String, Any}("relative_path"=>"expression_matrices/WMB-10Xv2/2…
julia> feature_matrix_label = "WMB-10Xv2-TH"
"WMB-10Xv2-TH"
julia> rpath = expression_matrices[feature_matrix_label]["log2"]["files"]["h5ad"]["relative_path"]
# for raw data, use "raw" instead of "log2"
"expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad"
julia> download_base = joinpath(datapath,"AllenBrain") # assumes `datapath` is already defined as a path on your local machine
"/storage1/fs1/holy/Active/username/work/Data/AllenBrain"
julia> local_path = joinpath(download_base, split(rpath,"/")... )
"/storage1/fs1/holy/Active/username/work/Data/AllenBrain/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad"
julia> AllenBrain.download_dir(manifest, rpath, local_path)
Downloading from s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad
to /storage1/fs1/holy/Active/username/work/Data/AllenBrain/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad
```
"""
function download_dir(manifest::AWSManifest, relative_path::String, to::AbstractString; config=allen_config)
from = S3Path(manifest.resource_uri * relative_path; config)
print("Downloading from $(Path(from))\n to $(to)")
pt, _ = splitdir(to)
isdir(pt) || mkdir(Path(pt), recursive=true, exist_ok=true)
sync(from, Path(to))
end

0 comments on commit 65e6ab7

Please sign in to comment.