Skip to content

Commit

Permalink
Merge pull request #9 from sangshuduo/feat/sangshuduo/random-pairs-wi…
Browse files Browse the repository at this point in the history
…th-s3

feat: support except file comand line argument
  • Loading branch information
sangshuduo authored Jan 14, 2025
2 parents 609e57b + 536f4fb commit 6effea5
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
16 changes: 8 additions & 8 deletions random_pairs_of_s3file/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
## random_pairs_of_s3file Usage:

```shell
Usage: random_pairs_of_s3file [OPTIONS] --num-pairs <NUM> --bucket <BUCKET> --directory <DIR> --url-prefix <PREFIX>
Usage: random_pairs_of_s3file [OPTIONS] --num-pairs <NUM_PAIRS> --bucket <BUCKET> --directory <DIRECTORY> --url-prefix <URL_PREFIX>

Options:
--num-pairs <NUM> Number of pairs to generate
--bucket <BUCKET> Name of the S3 bucket
--directory <DIR> Directory (prefix) in the bucket (e.g. "image/")
--url-prefix <PREFIX> URL prefix for final URLs
-h, --help Print help
-V, --version Print version
--num-pairs <NUM_PAIRS> Number of pairs to generate
--bucket <BUCKET> Name of the S3 bucket
--directory <DIRECTORY> Directory (prefix) in the bucket (e.g. "image/")
--url-prefix <URL_PREFIX> URL prefix to form the final URL (e.g. "https://api.example.com/s3/api/v1/resource?url=s3://")
--exclude-file <EXCLUDE_FILE> File containing keys to exclude
-h, --help Print help
-V, --version Print version
20 changes: 18 additions & 2 deletions random_pairs_of_s3file/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use clap::Parser;
use rand::seq::SliceRandom;
use serde::Serialize;
use std::collections::HashSet;
use std::error::Error;
use std::fs::File;
use std::io::{BufRead, BufReader};

// AWS SDK for Rust (1.x)
use aws_config::{load_defaults, BehaviorVersion};
Expand All @@ -28,6 +31,10 @@ struct Args {
/// URL prefix to form the final URL (e.g. "https://api.example.com/s3/api/v1/resource?url=s3://")
#[arg(long, required = true)]
url_prefix: String,

/// File containing keys to exclude
#[arg(long, required = false)]
exclude_file: Option<String>,
}

#[derive(Serialize)]
Expand All @@ -50,6 +57,14 @@ async fn main() -> Result<(), Box<dyn Error>> {
let directory_prefix = &args.directory;
let url_prefix = &args.url_prefix;

// Read excluded keys from file if provided
let excluded_keys: HashSet<String> = if let Some(exclude_file_path) = args.exclude_file {
let file = File::open(&exclude_file_path)?;
BufReader::new(file).lines().map_while(Result::ok).collect()
} else {
HashSet::new()
};

let shared_config = load_defaults(BehaviorVersion::latest()).await;
let s3_client = Client::new(&shared_config);

Expand Down Expand Up @@ -77,6 +92,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
let all_keys: Vec<String> = objects
.iter()
.filter_map(|obj| obj.key().map(str::to_string))
.filter(|key| !excluded_keys.contains(key))
.collect();

if all_keys.len() < 2 {
Expand All @@ -91,12 +107,12 @@ async fn main() -> Result<(), Box<dyn Error>> {
let mut all_pairs = Vec::new();
for (i, source) in all_keys.iter().enumerate() {
// check if source is empty
if source.is_empty() {
if source.is_empty() || source.ends_with('/') {
continue;
}
for (j, candidate) in all_keys.iter().enumerate() {
// check if candidate is is_empty
if candidate.is_empty() {
if candidate.is_empty() || candidate.ends_with('/') {
continue;
}
if i != j {
Expand Down

0 comments on commit 6effea5

Please sign in to comment.