diff --git a/Cargo.lock b/Cargo.lock index ad08b83..f90c5aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1061,7 +1061,7 @@ dependencies = [ [[package]] name = "seva" -version = "0.1.0" +version = "0.1.1" dependencies = [ "bytes", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 6bcd768..5533131 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "seva" -version = "0.1.0" +version = "0.1.1" edition = "2021" authors = ["Matt Gathu "] description = "Simple directory http server inspired by Python's http.server" diff --git a/src/errors.rs b/src/errors.rs index abff86a..78d42fe 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,8 +1,10 @@ use core::fmt; -use std::{io, string::FromUtf8Error, time::SystemTimeError}; +use std::{io, num::ParseIntError, string::FromUtf8Error, time::SystemTimeError}; use thiserror::Error; +use crate::http::HeaderName; + pub type Result = std::result::Result; #[derive(Error, Debug)] @@ -26,6 +28,8 @@ pub enum SevaError { TestClient(String), #[error("URI Too Long")] UriTooLong, + #[error("Missing value for header: {0}")] + MissingHeaderValue(HeaderName), } #[derive(Error, Debug)] @@ -36,6 +40,8 @@ pub enum ParsingError { UnknownMethod(String), PestRuleError(String), DateTime(String), + IntError(#[from] ParseIntError), + InvalidRangeHeader(String), } impl fmt::Display for ParsingError { diff --git a/src/http.rs b/src/http.rs index ed3fc21..1b82f01 100644 --- a/src/http.rs +++ b/src/http.rs @@ -23,6 +23,9 @@ pub struct Request<'a> { } impl<'a> Request<'a> { + pub fn is_partial(&self) -> bool { + self.headers.contains_key(&HeaderName::Range) + } pub fn parse(req_str: &str) -> Result { trace!("Request::parse"); let mut res = HttpParser::parse(Rule::request, req_str) @@ -121,6 +124,10 @@ impl ResponseBuilder { Self::new(StatusCode::Ok, BTreeMap::new()) } + pub fn partial() -> ResponseBuilder { + Self::new(StatusCode::PartialContent, BTreeMap::new()) + } + pub fn not_found() -> ResponseBuilder { Self::new(StatusCode::NotFound, BTreeMap::new()) } @@ -273,15 +280,100 @@ header = { header_name ~ ":" ~ whitespace ~ header_value ~ NEWLINE } header_name = { (!(NEWLINE | ":") ~ ANY)+ } header_value = { (!NEWLINE ~ ANY)+ } - +// accept-encoding header parser ws = _{( " " | "\t")*} accept_encoding = { encoding ~ ws ~ ("," ~ ws ~ encoding)* ~ EOI} algo = {(ASCII_ALPHA+ | "identity" | "*")} weight = {ws ~ ";" ~ ws ~ "q=" ~ qvalue} qvalue = { ("0" ~ ("." ~ ASCII_DIGIT{,3}){,1}) | ("1" ~ ("." ~ "0"{,3}){,1}) } encoding = { algo ~ weight*} + +// Range header parser +// +// A range request can specify a single range or a set of ranges within a single representation. +// +// Range = ranges-specifier +// ranges-specifier = range-unit "=" range-set +// range-unit = token +// range-set = 1#range-spec +// range-spec = int-range / suffix-range / other-range +// int-range = first-pos "-" [ last-pos ] +// first-pos = 1*DIGIT +// last-pos = 1*DIGIT +// suffix-range = "-" suffix-length +// suffix-length = 1*DIGIT +// other-range = 1*( %x21-2B / %x2D-7E ) ; 1*(VCHAR excluding comma) +// +bytes_range = { "bytes" ~ ws ~ "=" ~ ws ~ range_sets } +range_sets = _{ range_set ~ ws ~ ("," ~ ws ~ range_set)* ~ EOI } +range_set = _{(int_range | suffix_range)} +int_range = { first_pos ~ "-" ~ last_pos*} +suffix_range = { "-" ~ len} +first_pos = { ASCII_DIGIT+ } +last_pos = { ASCII_DIGIT+ } +len = { ASCII_DIGIT* } "#] -struct HttpParser; +pub struct HttpParser; + +impl HttpParser { + pub fn parse_bytes_range(val: &str, max_len: usize) -> Result> { + let br = HttpParser::parse(Rule::bytes_range, val) + .map_err(|e| ParsingError::PestRuleError(format!("{e:?}")))? + .next() + .unwrap(); + let mut ranges = vec![]; + for pair in br.into_inner() { + match pair.as_rule() { + Rule::int_range => { + let mut inner = pair.into_inner(); + let start = inner + .next() + .unwrap() + .as_str() + .parse() + .map_err(ParsingError::IntError)?; + let end = match inner.next() { + Some(r) => { + r.as_str().parse().map_err(ParsingError::IntError)? + } + None => max_len, + }; + if start > end { + Err(ParsingError::InvalidRangeHeader(val.to_owned()))?; + } + let size = end - start; + ranges.push(BytesRange { start, size }); + } + Rule::suffix_range => { + let mut inner = pair.into_inner(); + let size = inner + .next() + .unwrap() + .as_str() + .parse() + .map_err(ParsingError::IntError)?; + if size >= max_len { + Err(ParsingError::InvalidRangeHeader(val.to_owned()))?; + } + let start = max_len - size; + ranges.push(BytesRange { start, size }); + } + _ => {} + } + } + if ranges.len() > 10 { + return Err(ParsingError::InvalidRangeHeader(val.to_owned()))?; + } + + Ok(ranges) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct BytesRange { + pub start: usize, + pub size: usize, +} macro_rules! status_codes { ( @@ -394,6 +486,9 @@ status_codes! { /// This response is sent on an idle connection (RequestTimeout,408); + /// Indicates that a server cannot serve the requested ranges. + (RangeNotSatisifiable, 416); + /// The user has sent too many requests in a given amount of time ("rate /// limiting"). (TooManyRequests,429); @@ -488,12 +583,18 @@ header_names! { /// Indicates the size of the entity-body. (ContentLength, "content-length"); + /// Indicates where in a full body message a partial message belongs. + (ContentRange, "content-range"); + /// Used to indicate the media type of the resource. (ContentType, "content-type"); /// Contains the date and time at which the message was originated. (Date, "date"); + /// Identifier for a specific version of a resource. + (ETag, "etag"); + /// Specifies the domain name of the server and (optionally) the TCP port /// number on which the server is listening. (Host, "host"); @@ -513,6 +614,9 @@ header_names! { /// Indicates the part of a document that the server should return. (Range, "range"); + /// Contains the absolute or partial address from which a resource has been requested. + (Referer, "referer"); + /// Contains information about the software used by the origin server to /// handle the request. (Server, "server"); @@ -521,6 +625,9 @@ header_names! { /// software. (UserAgent, "user-agent"); + /// Describes the parts of the request message aside from the method and URL that influenced the content of the response it occurs in. + (Vary, "vary"); + /// General HTTP header contains information about possible problems with /// the status of the message. (Warning, "warning"); @@ -567,6 +674,25 @@ mod tests { Ok(()) } + #[test] + fn bytes_range_parser() -> Result<()> { + for val in [ + "bytes=0-499", + "bytes=500-999", + "bytes=-500", + "bytes=9500-", + "bytes=0-0,-1", + "bytes= 0-0, -2", + "bytes= 0-999, 4500-5499, -1000", + "bytes=500-600,601-999", + "bytes=500-700,601-999", + ] { + let range = HttpParser::parse_bytes_range(val, 10000); + assert!(range.is_ok(), "failed to parse: {val}. Reason: {range:?}"); + } + Ok(()) + } + #[test] fn response_body_type_mapping() -> Result<()> { let builder = ResponseBuilder::ok(); diff --git a/src/server.rs b/src/server.rs index 8f3c55a..042c33a 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,7 +1,7 @@ use std::{ collections::HashMap, fs::{metadata, read_dir, File}, - io::{self, Cursor, Read, Write}, + io::{self, Cursor, Read, Seek, Write}, net::{SocketAddr, TcpListener, TcpStream}, path::PathBuf, str::FromStr, @@ -14,13 +14,17 @@ use std::{ use bytes::{BufMut, BytesMut}; use chrono::Local; use clap::crate_version; +use contracts::debug_requires; use handlebars::Handlebars; use tracing::{debug, error, info, trace}; use crate::{ - errors::{IoErrorUtils, Result, SevaError}, + errors::{IoErrorUtils, ParsingError, Result, SevaError}, fs::{DirEntry, EntryType}, - http::{HeaderName, HttpMethod, Request, Response, ResponseBuilder, StatusCode}, + http::{ + HeaderName, HttpMethod, HttpParser, Request, Response, ResponseBuilder, + StatusCode, + }, mime::MimeType, }; @@ -143,6 +147,12 @@ impl RequestHandler { SevaError::UriTooLong => { self.send_error(StatusCode::UriTooLong, None)? } + SevaError::ParsingError(ParsingError::InvalidRangeHeader(hdr)) => { + self.send_error( + StatusCode::RangeNotSatisifiable, + Some(&format!("invalid range: {hdr}")), + )? + } _ => { error!("internal server error: {e}"); self.send_error( @@ -173,7 +183,13 @@ impl RequestHandler { self.send_dir(&req, "/", &self.dir.clone())?; } else if let Some(entry) = self.lookup_path(&req_path)? { match entry.file_type { - EntryType::File => self.send_file(&req, &entry)?, + EntryType::File => { + if req.is_partial() { + self.send_partial(&req, &entry)? + } else { + self.send_file(&req, &entry)? + } + } EntryType::Dir => { if req_path.ends_with('/') { trace!("RequestHandler::_handle send_dir"); @@ -234,6 +250,45 @@ impl RequestHandler { Ok(()) } + #[debug_requires(request.headers.contains_key(&HeaderName::Range))] + fn send_partial(&mut self, request: &Request, entry: &DirEntry) -> Result<()> { + trace!("RequestHandler::send_partial"); + let val = request + .headers + .get(&HeaderName::Range) + .ok_or_else(|| SevaError::MissingHeaderValue(HeaderName::Range))?; + let ranges = HttpParser::parse_bytes_range(val, entry.size as usize) + .map_err(|_| ParsingError::InvalidRangeHeader(val.to_string()))?; + + // we only serve the first range + let range = ranges + .into_iter() + .next() + .ok_or_else(|| ParsingError::InvalidRangeHeader(val.to_string()))?; + let mut file = File::open(&entry.name)?; + file.seek(io::SeekFrom::Start(range.start as u64))?; + let mut buf = vec![0u8; range.size]; + file.read_exact(&mut buf)?; + let response = ResponseBuilder::partial() + .headers(self.get_file_headers(entry)) + .header(HeaderName::ContentLength, &format!("{}", buf.len())) + .header( + HeaderName::ContentRange, + &format!( + "bytes {}-{}/{}", + range.start, + range.start + range.size, + entry.size + ), + ) + .header(HeaderName::Vary, "*") + .body(Cursor::new(buf)) + .build(); + self.send_response(response, request)?; + + Ok(()) + } + fn redirect(&mut self, req: &Request, location: &str) -> Result<()> { let resp = ResponseBuilder::redirect(location).build(); self.send_response(resp, req)?; @@ -337,7 +392,11 @@ impl RequestHandler { 0 } else { trace!("RequestHandler::send_response body io::copy"); - io::copy(&mut response.body, &mut self.stream)? as usize + // TODO: can we do this w/o blocking + self.stream.set_nonblocking(false)?; + let count = io::copy(&mut response.body, &mut self.stream)? as usize; + self.stream.set_nonblocking(true)?; + count }; self.log_response(request, response.status, bytes_sent); @@ -386,6 +445,7 @@ impl RequestHandler { self.send_hdr(HeaderName::Server, server)?; self.send_hdr(HeaderName::Date, Local::now().to_rfc2822())?; self.send_hdr(HeaderName::Connection, "close")?; + self.send_hdr(HeaderName::AcceptRanges, "bytes")?; // Finish self.stream.write_all(b"\r\n")?; @@ -635,6 +695,71 @@ mod tests { Ok(()) } + #[test] + #[cfg(unix)] + fn range_requests_ok() -> Result<()> { + let port = start_server()?; + + let client = reqwest::blocking::Client::new(); + let response = client + .get(format!("http://127.0.0.1:{port}/Cargo.toml")) + .header("Range", "bytes=0-24") + .send() + .map_err(|e| SevaError::TestClient(format!("{}", e)))?; + + assert!(response.status().is_success()); + assert_eq!( + response.headers().get("content-length"), + Some(&HeaderValue::from_str("24").unwrap()) + ); + assert_eq!( + response.headers().get("content-type"), + Some(&HeaderValue::from_str("application/toml").unwrap()) + ); + assert_eq!(response.status().as_u16(), 206); + let expected_body = "[package]\nname = \"seva\"\n"; + let resp_bytes = response + .text() + .map_err(|e| SevaError::TestClient(format!("{}", e)))?; + assert_eq!(resp_bytes, expected_body); + + Ok(()) + } + + #[test] + fn invalid_range_unit() -> Result<()> { + let port = start_server()?; + + let client = reqwest::blocking::Client::new(); + let response = client + .get(format!("http://127.0.0.1:{port}/Cargo.toml")) + .header("Range", "bits=0-500") + .send() + .map_err(|e| SevaError::TestClient(format!("{}", e)))?; + + assert!(response.status().is_client_error()); + assert_eq!(response.status().as_u16(), 416); + + Ok(()) + } + + #[test] + fn empty_bytes_range() -> Result<()> { + let port = start_server()?; + + let client = reqwest::blocking::Client::new(); + let response = client + .get(format!("http://127.0.0.1:{port}/Cargo.toml")) + .header("Range", "bytes= ") + .send() + .map_err(|e| SevaError::TestClient(format!("{}", e)))?; + + assert!(response.status().is_client_error()); + assert_eq!(response.status().as_u16(), 416); + + Ok(()) + } + #[test] fn mime_type_works() -> Result<()> { let port = start_server()?;