From 31bb9ab87e39e17a75e35aa7db2cd20a9acf12c1 Mon Sep 17 00:00:00 2001 From: lava Date: Fri, 23 Dec 2011 00:16:08 -0600 Subject: [PATCH] voting and mitm ready version --- Cache.cpp | 376 ++++++++++ Cache.h | 53 ++ CacheEntry.cpp | 66 ++ CacheEntry.h | 64 ++ HTTP.cpp | 436 ++++++++++++ HTTP.h | 68 ++ HTTPRequest.cpp | 147 ++++ HTTPRequest.h | 34 + Makefile | 35 + MyServerSocket.cpp | 94 +++ MyServerSocket.h | 31 + MySocket.cpp | 450 ++++++++++++ MySocket.h | 96 +++ MySocketException.h | 22 + README | 19 + dbg.h | 18 + http_parser.c | 1625 +++++++++++++++++++++++++++++++++++++++++++ http_parser.h | 176 +++++ main.cpp | 263 +++++++ 19 files changed, 4073 insertions(+) create mode 100644 Cache.cpp create mode 100644 Cache.h create mode 100644 CacheEntry.cpp create mode 100644 CacheEntry.h create mode 100644 HTTP.cpp create mode 100644 HTTP.h create mode 100644 HTTPRequest.cpp create mode 100644 HTTPRequest.h create mode 100644 Makefile create mode 100644 MyServerSocket.cpp create mode 100644 MyServerSocket.h create mode 100644 MySocket.cpp create mode 100644 MySocket.h create mode 100644 MySocketException.h create mode 100644 README create mode 100644 dbg.h create mode 100644 http_parser.c create mode 100644 http_parser.h create mode 100644 main.cpp diff --git a/Cache.cpp b/Cache.cpp new file mode 100644 index 0000000..d31ea5c --- /dev/null +++ b/Cache.cpp @@ -0,0 +1,376 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "Cache.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "CacheEntry.h" +#include "dbg.h" + +using namespace std; + +static Cache globalCache; +int Cache::num_browsers = 0; + +static string reply404 = "HTTP/1.1 404 Not Found\r\nServer: twproxy\r\nConnection: close\r\nContent-Length: 0\r\n\r\n"; + + +extern int serverPorts[]; + +MySocket *Cache::getReplySocket(string host, bool isSSL) +{ + assert(host.find(':') != string::npos); + assert(host.find(':') < (host.length()-1)); + string portStr = host.substr(host.find(':')+1); + string hostStr = host.substr(0, host.find(':')); + int port; + int ret = sscanf(portStr.c_str(), "%d", &port); + assert((ret == 1) && (port > 0)); + MySocket *replySock = NULL; + try { + //cout << "making connection to " << hostStr << ":" << port << endl; + replySock = new MySocket(hostStr.c_str(), port); + if(isSSL) { + replySock->enableSSLClient(); + } + } catch(char *e) { + cout << e << endl; + } catch(...) { + cout << "could not connect to " << hostStr << ":" << port << endl; + } + return replySock; +} + +//XXX: should check url, method, cookie, possible even port +CacheEntry *Cache::find(string url, string /*request*/) { + map::iterator i = m_store.find(url); + if(i == m_store.end()) + return NULL; + else + return (CacheEntry *)(i->second); +} + +void Cache::addToStore(string url, CacheEntry *ent) { + assert(ent != NULL); + m_store.insert(pair(url, ent)); +} + +int Cache::votingFetchInsertWriteback(string url, string request, int browserId, MySocket *browserSock, string host, bool isSSL, MySocket *replySock) +{ + assert(browserId >= 0); + //find() will take care of all checks, including same url, same method, different + //cookie for the same browser or different browser + CacheEntry *ent = find(url, request); + if(ent == NULL) { + //first request of this url, wait till someone vote and fetch then write + //back to brower, or keep waiting forever + cache_dbg("browser: %d ,%s no exist\n", browserId, url.c_str()); + ent = new CacheEntry(num_browsers, url, request); + ent->setReqState(CACHE_NEW); + cache_dbg("browser: %d ,%s SET to cache_new\n", browserId, url.c_str()); + int ret = ent->updateReqVec(browserId); + + vector v = ent->getReqVec(); + printf("%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); + + + assert(ret == 1); + addToStore(url, ent); + while(ent->getReqState() != CACHE_IN) { + cache_dbg("CACHE_NEW cache before WAIT browser %d %s\n", browserId, url.c_str()); + pthread_cond_wait(&cache_cond, &cache_mutex); + cache_dbg("CACHE_NEW cache AFTER WAIT browser %d %s\n", browserId, url.c_str()); + } + + + v = ent->getReqVec(); + printf("%d waked up\n%d %d %d %s\n", browserId, v[0], v[1], v[2], url.c_str()); + + sendBrowser(browserSock, ent, browserId); + } + else if(ent->getReqState() == CACHE_IN) { + //this request is fetched + cache_dbg("browser: %d ,%s is CACHE_IN\n", browserId, url.c_str()); + int ret = ent->updateReqVec(browserId); + + vector v = ent->getReqVec(); + printf("%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); + + //opera is really making two same requests to http://google.com, no difference +// assert(ret == 1); + sendBrowser(browserSock, ent, browserId); + } + else if(ent->getReqState() == CACHE_FETCHING) { + //somebody is fetching the request, wait till done + cache_dbg("browser: %d ,%s is CACHE_FETCHING\n", browserId, url.c_str()); + int ret = ent->updateReqVec(browserId); + + vector v = ent->getReqVec(); + printf("fetching\n%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); + + +// assert(ret == 1); + while(ent->getReqState() != CACHE_IN) { + cache_dbg("CACHE_FETCHING cache before WAIT browser %d %s\n", browserId, url.c_str()); + pthread_cond_wait(&cache_cond, &cache_mutex); + cache_dbg("CACHE_FETCHING cache AFTER WAIT browser %d %s\n", browserId, url.c_str()); + } + + + v = ent->getReqVec(); + printf("%d waked up\n%d %d %d %s\n", browserId, v[0], v[1], v[2], url.c_str()); + + + sendBrowser(browserSock, ent, browserId); + } + else if(ent->getReqState() == CACHE_NEW) { + //vote for some previous request from someone, and FETCH + //it won't be my own old request, find() is going to take of that + int ret = ent->updateReqVec(browserId); + vector v = ent->getReqVec(); + printf("%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); +// assert(ret == 1); + cache_dbg("browser: %d ,%s SET to cache_fetching\n", browserId, url.c_str()); + ent->setReqState(CACHE_FETCHING); + cache_dbg("cache UNlock browser %d %s\n", browserId, url.c_str()); + pthread_mutex_unlock(&cache_mutex); + fetch(ent, host, isSSL, browserId, replySock); + pthread_mutex_lock(&cache_mutex); + cache_dbg("cache lock browser %d %s\n", browserId, url.c_str()); + cache_dbg("browser: %d ,%s SET to cache_in\n", browserId, url.c_str()); + ent->setReqState(CACHE_IN); + sendBrowser(browserSock, ent, browserId); + } + else + assert(false); + return 0; +} + +int Cache::sendBrowser(MySocket *browserSock, CacheEntry *ent, int browserId) { + cache_dbg("sendBrowser send to browser %d, response length %d\n", browserId, ent->getResponse().length()); + bool ret = browserSock->write_bytes(ent->getResponse().c_str(), ent->getResponse().length()); + + ent->updateRespVec(browserId); + return 0; +} + +void Cache::getHTTPResponseVote(string host, string request, string url, int serverPort, + MySocket *browserSock, bool isSSL, MySocket *replySock) +{ + int browserId = -1; + pthread_mutex_lock(&cache_mutex); + browserId = serverPort - serverPorts[0]; + cache_dbg("cache lock browser %d %s\n", browserId, url.c_str()); + votingFetchInsertWriteback(url, request, browserId, browserSock, host, isSSL, replySock); + cache_dbg("cache UNlock browser %d %s\n", browserId, url.c_str()); + pthread_mutex_unlock(&cache_mutex); + cache_dbg("cache BROADCAST browser %d %s\n", browserId, url.c_str()); + pthread_cond_broadcast(&cache_cond); +} + +static void dbg_fetch(int ret) { + switch(ret) { + case ENOT_CONNECTED: + cache_dbg("ESOCKET_CONNECTED returned by replySock->read()\n"); + break; + case ECONN_CLOSED: + //cache_dbg("ESOCKET_CLOSED returned by replySock->read()\n"); + break; + case ESOCKET_ERROR: + cache_dbg("ESOCKET_ERROR returned by replySock->read()\n"); + break; + default: + cache_dbg("%d bytes read by replySock->read()\n", ret); + break; + } +} + + +int Cache::fetch(CacheEntry *ent, string host, bool isSSL, int browserId, MySocket *replySock) { + if(replySock == NULL) { + cout << "returning 404" << endl; + ent->appendResponse(reply404); + return -1; + } + cache_dbg("CACHE: SENDING request %s\n", ent->getRequest().c_str()); + if(!replySock->write_bytes(ent->getRequest())) { + cout << "returning 404" << endl; + ent->appendResponse(reply404); + return -1; + } + printf("FETCH CALLED %s, response length: %d\n", ent->getUrl().c_str(), ent->getResponse().length()); + unsigned char buf[1024]; + int num_bytes; + cache_dbg("browser %d: BEFORE FETCHING %s\n", browserId, ent->getUrl().c_str()); + while((num_bytes = replySock->read(buf, sizeof(buf))) > 0) { + cache_dbg("CACHE: fetch read %d bytes\n", num_bytes); + ent->appendResponse((const char *)buf, num_bytes); + cache_dbg("ent->resposne length: %d\n", ent->getResponse().length()); + } + dbg_fetch(num_bytes); + + printf("FETCHED %s, response length: %d\n", ent->getUrl().c_str(), ent->getResponse().length()); + cache_dbg("browser %d: done FETCHING %s, response length: %d\n", browserId, ent->getUrl().c_str(), ent->getResponse().length()); + cache_dbg("browser %d: fetched %s\n%s\n", browserId, ent->getUrl().c_str(), ent->getResponse().c_str()); + delete replySock; + return 0; +} + +void Cache::handleResponse(MySocket *browserSock, MySocket *replySock, string request) +{ + if(!replySock->write_bytes(request)) { + // XXX FIXME we should do something other than 404 here + browserSock->write_bytes(reply404); + return; + } + unsigned char buf[1024]; + int num_bytes; + bool ret; + while((num_bytes = replySock->read(buf, sizeof(buf))) > 0) { + ret = browserSock->write_bytes(buf, num_bytes); + if(!ret) { + break; + } + } +} + +void Cache::getHTTPResponseNoVote(string host, string request, string url, int serverPort, + MySocket *browserSock, bool isSSL, MySocket *replySock) +{ + if(replySock == NULL) { + cout << "returning 404" << endl; + browserSock->write_bytes(reply404); + return; + } + handleResponse(browserSock, replySock, request); + + delete replySock; +} + +void Cache::setNumBrowsers(const int num) +{ + num_browsers = (int)num; +} + + + +Cache *cache() +{ + return &globalCache; +} + + +Cache::Cache() +{ + pthread_mutex_init(&cache_mutex, NULL); + pthread_cond_init(&cache_cond, NULL); +} +Cache::~Cache() +{ + pthread_cond_destroy(&cache_cond); + pthread_mutex_destroy(&cache_mutex); +} + + + +/* +bool Cache::copyNetBytes(MySocket *readSock, MySocket *writeSock) +{ + unsigned char buf[1024]; + int ret; + + ret = readSock->read(buf, sizeof(buf)); + if(ret <= 0) + return false; + + return writeSock->write_bytes(buf, ret); +} + +void Cache::handleTunnel(MySocket *browserSock, MySocket *replySock) +{ + if(!browserSock->write_bytes(CONNECT_REPLY)) + return; + + int bFd = browserSock->getFd(); + int rFd = replySock->getFd(); + + int ret; + fd_set readSet; + + int maxFd = (bFd > rFd) ? bFd : rFd; + + while(true) { + FD_ZERO(&readSet); + + FD_SET(rFd, &readSet); + FD_SET(bFd, &readSet); + + ret = select(maxFd+1, &readSet, NULL, NULL, NULL); + + if(ret <= 0) + break; + + if(FD_ISSET(rFd, &readSet)) { + if(!copyNetBytes(replySock, browserSock)) { + break; + } + } + + if(FD_ISSET(bFd, &readSet)) { + if(!copyNetBytes(browserSock, replySock)) { + break; + } + } + } +} +*/ diff --git a/Cache.h b/Cache.h new file mode 100644 index 0000000..edd3ce5 --- /dev/null +++ b/Cache.h @@ -0,0 +1,53 @@ +#ifndef _CACHE_H_ +#define _CACHE_H_ + +#include +#include "MySocket.h" + +#include "CacheEntry.h" +#include +#include +class Cache { + public: + Cache(); + virtual ~Cache(); + void getHTTPResponseNoVote(std::string host, std::string request, std::string url, + int serverPort, MySocket *browserSock, bool isSSL, MySocket *replySock); + void getHTTPResponseVote(std::string host, std::string request, std::string url, + int serverPort, MySocket *browserSock, bool isSSL, MySocket *replySock); + MySocket *getReplySocket(std::string host, bool isSSL); + static void setNumBrowsers(const int num); + protected: + void handleResponse(MySocket *browserSock, MySocket *replySock, std::string request); + //void handleTunnel(MySocket *browserSock, MySocket *replySock); + //bool copyNetBytes(MySocket *readSock, MySocket *writeSock); + + + //Need this "string request" parameter, because we have to buffer it, and send it + //out later if voted + //Must grab cache_mutex + int votingFetchInsertWriteback(std::string url, std::string request, int browserId, + MySocket *browserSock, std::string host, bool isSSL, MySocket *replySock); + //Must grab cache_mutex + void addToStore(std::string url, CacheEntry *ent); + //Must grab cache_mutex + CacheEntry *find(std::string url, std::string request); + //Must grab cache_mutex + int sendBrowser(MySocket *browserSock, CacheEntry *ent, int browserId); + + int fetch(CacheEntry *ent, std::string host, bool isSSL, int browserId, MySocket *replySock); + + + std::map m_store; + + pthread_mutex_t cache_mutex; + pthread_cond_t cache_cond; + + static int num_browsers; +}; + + + +Cache *cache(); + +#endif diff --git a/CacheEntry.cpp b/CacheEntry.cpp new file mode 100644 index 0000000..43f2ae1 --- /dev/null +++ b/CacheEntry.cpp @@ -0,0 +1,66 @@ +#include "CacheEntry.h" + +CacheEntry::CacheEntry(int num_browsers, std::string _url, std::string _request) +{ + url = _url; + method = METHOD_UNDEFINED; + cache_state = CACHE_UNDEFINED; + m_numBrowsers = num_browsers; + request = _request; + for(int i = 0; i < num_browsers; i++) { + req_vec.push_back(0); + resp_vec.push_back(0); + } +} + +CacheEntry::~CacheEntry() +{ +} + +void CacheEntry::setReqState(REQUEST_CACHE_STATE s) +{ + cache_state = s; +} + +REQUEST_CACHE_STATE CacheEntry::getReqState() +{ + return cache_state; +} + +int CacheEntry::updateReqVec(int browserId) +{ + assert(browserId < m_numBrowsers); + return ++req_vec[browserId]; +} + +int CacheEntry::updateRespVec(int browserId) +{ + assert(browserId < m_numBrowsers); +// assert(resp_vec[browserId] == 0); + return ++resp_vec[browserId]; +} + +std::string CacheEntry::getRequest() +{ + return request; +} + +std::string CacheEntry::getResponse() +{ + return response; +} + +void CacheEntry::appendResponse(std::string part) +{ + response.append(part.c_str()); +} + +void CacheEntry::appendResponse(const char *part, int size) +{ + response.append(part, size); +} + +std::string CacheEntry::getUrl() +{ + return url; +} diff --git a/CacheEntry.h b/CacheEntry.h new file mode 100644 index 0000000..f3f699a --- /dev/null +++ b/CacheEntry.h @@ -0,0 +1,64 @@ +//CacheEntry is ONLY used when in voting mode, non-voting mode does NOT have cache +//because Squid can easily do that for us(except MITM need more tricks). +#ifndef __CACHESTORE_H_ +#define __CACHESTORE_H_ + +#include +#include +#include + +#include "assert.h" + +enum HTTP_METHODS { + GET = 0, + POST, + METHOD_UNDEFINED, +}; + +enum REQUEST_CACHE_STATE { + CACHE_NEW = 0, + CACHE_FETCHING, + CACHE_IN, + CACHE_UNDEFINED, +}; + +class CacheEntry { + public: + CacheEntry(int num_browsers, std::string _url, std::string _request); + + virtual ~CacheEntry(); + + void setReqState(REQUEST_CACHE_STATE s); + + REQUEST_CACHE_STATE getReqState(); + + int updateReqVec(int browserId); + + int updateRespVec(int browserId); + + std::vector getReqVec() { return req_vec; }; + + std::string getRequest(); + + std::string getResponse(); + + void appendResponse(std::string part); + + void appendResponse(const char *part, int size); + + std::string getUrl(); + + protected: + int m_numBrowsers; + std::string url; + HTTP_METHODS method; + std::vector req_vec; + std::vector resp_vec; + std::string request; + std::string response; + volatile REQUEST_CACHE_STATE cache_state; +}; + + + +#endif diff --git a/HTTP.cpp b/HTTP.cpp new file mode 100644 index 0000000..79215ec --- /dev/null +++ b/HTTP.cpp @@ -0,0 +1,436 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "HTTP.h" + +#include +#include + +#include +#include + +using namespace std; + + +/***************************** HTTP Parser callbacks ************************/ + +int HTTP::message_begin_cb(http_parser *parser) +{ + HTTP *http = (HTTP *) parser->data; + assert(http->getState() == HTTP::INIT); + http->setState(HTTP::HEADER); + return 0; +} + +int HTTP::path_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->m_path.append(at, length); + return 0; +} +int HTTP::query_string_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->m_query.append(at, length); + return 0; +} + +int HTTP::url_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->appendUrl(at, length); + + return 0; +} + +int HTTP::fragment_cb(http_parser */*parser*/, const char */*at*/, size_t /*length*/) +{ + cout << "fragment" << endl; + assert(false); + return 0; +} + +int HTTP::header_field_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + + if(http->getState() == HTTP::FIELD) { + http->appendHeaderField(at, length); + } else if((http->getState() == HTTP::VALUE) || + (http->getState() == HTTP::HEADER)) { + http->newHeaderField(at, length); + http->setState(HTTP::FIELD); + } else { + assert(false); + } + + return 0; +} + +int HTTP::header_value_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + if(http->getState() == HTTP::FIELD) { + http->setState(HTTP::VALUE); + } + assert(http->getState() == HTTP::VALUE); + http->appendHeaderValue(at, length); + return 0; +} + +int HTTP::headers_complete_cb(http_parser *parser) +{ + HTTP *http = (HTTP *) parser->data; + http->addHeaderField(); + http->m_headerDone = true; + + if(http->m_httpType == HTTP_RESPONSE) { + char buf[64]; + sprintf(buf, "HTTP/%u.%u %u ", parser->http_major, parser->http_minor, parser->status_code); + http->m_statusStr = buf; + if(parser->status_code == 200) { + http->m_statusStr += "OK"; + } else if(parser->status_code == 204) { + http->m_statusStr += "No Content"; + } else if(parser->status_code == 301) { + http->m_statusStr += "Moved Permanently"; + } else if(parser->status_code == 302) { + http->m_statusStr += "Moved Temporarily"; + } else if(parser->status_code == 304) { + http->m_statusStr += "Not Modified"; + } else if(parser->status_code == 403) { + http->m_statusStr += "Forbidden"; + } else if(parser->status_code == 404) { + http->m_statusStr += "Not Found"; + } else if(parser->status_code == 408) { + http->m_statusStr += "Request Timeout"; + } else if(parser->status_code == 500) { + http->m_statusStr += "Internal Server Error"; + } else if(parser->status_code == 503) { + http->m_statusStr += "Service Unavailable"; + } else { + assert(false); + } + + http->m_extraParsedBytes = 1; + return -1; + } + + return 0; +} + +int HTTP::body_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->m_body.append(at, length); + + return 0; +} + +int HTTP::message_complete_cb(http_parser *parser) +{ + HTTP *http = (HTTP *) parser->data; + assert((http->getState() == HTTP::VALUE) || + (http->getState() == HTTP::BODY)); + http->setState(HTTP::DONE); + http->messageComplete(parser->method); + return 0; +} + +/****************************************************************************/ + + + +/*************************** Public Functions *******************************/ + + +HTTP::HTTP(http_parser_type httpType) +{ + m_state = INIT; + http_parser_init(&m_parser, httpType); + m_doneParsing = false; + m_httpType = httpType; + m_headerDone = false; + + m_settings.on_message_begin = message_begin_cb; + m_settings.on_path = path_cb; + m_settings.on_query_string = query_string_cb; + m_settings.on_url = url_cb; + m_settings.on_fragment = fragment_cb; + m_settings.on_header_field = header_field_cb; + m_settings.on_header_value = header_value_cb; + m_settings.on_headers_complete = headers_complete_cb; + m_settings.on_body = body_cb; + m_settings.on_message_complete = message_complete_cb; + + m_parser.data = this; + + m_field = NULL; + m_value = NULL; + m_extraParsedBytes = 0; +} + +HTTP::~HTTP() +{ + if(m_field != NULL) { + delete m_field; + } + + if(m_value != NULL) { + delete m_value; + } + + for(unsigned int idx = 0; idx < m_headers.size(); idx++) { + delete m_headers[idx].first; + delete m_headers[idx].second; + } +} + +int HTTP::addData(const unsigned char *data, int len) +{ + if(m_doneParsing) { + assert(false); + } + int ret = http_parser_execute(&m_parser, &m_settings, (const char *) data, len); + ret += m_extraParsedBytes; + m_extraParsedBytes = 0; + return ret; +} + +string HTTP::getBody() +{ + return m_body; +} + +string HTTP::getUrl() +{ + return m_url; +} + +string HTTP::getHost() +{ + string host = (m_method == HTTP_CONNECT) ? m_url : m_host; + if(host.find(':') == string::npos) { + host += ":80"; + } + return host; +} + +bool HTTP::isHeaderDone() +{ + return m_headerDone; +} + +bool HTTP::isDone() +{ + return m_doneParsing; +} + +string HTTP::getReplyHeader() +{ + string reply; + + assert(m_httpType == HTTP_RESPONSE); + assert(m_statusStr.size() > 0); + + reply = m_statusStr + "\r\n"; + + bool foundConn = false; + for(unsigned int idx = 0; idx < m_headers.size(); idx++) { + string field = *(m_headers[idx].first); + string value = *(m_headers[idx].second); + + if(field == "Connection") { + value = "close"; + foundConn = true; + } + + reply += field + string(": ") + value + string("\r\n"); + } + + if(!foundConn) { + reply += "Connection: close\r\n"; + } + + reply += "\r\n"; + + return reply; +} + +string HTTP::getProxyRequest(const char *userAgent) +{ + string reply; + string urlPathQuery; + + assert(m_httpType == HTTP_REQUEST); + + if((m_method == HTTP_GET) || (m_method == HTTP_POST) || (m_method == HTTP_HEAD)) { + if(m_path.size() == 0) { + urlPathQuery = "/"; + } else { + urlPathQuery = m_path; + } + if(m_query.size() > 0) { + urlPathQuery += "?" + m_query; + } + if(m_url.find(urlPathQuery) == string::npos) { + // this is a hack to get around buggy HTML from taobao + assert(m_query.size() > 0); + urlPathQuery = m_path + "??" + m_query; + if(m_url.find(urlPathQuery) == string::npos) { + cout << "url path mismatch " << m_url << endl << urlPathQuery << endl; + } + } + } + + if(m_method == HTTP_GET) { + reply = "GET " + urlPathQuery + " HTTP/1.1\r\n"; + } else if(m_method == HTTP_CONNECT) { + reply = "CONNECT " + m_url + " HTTP/1.1\r\n"; + } else if(m_method == HTTP_POST) { + reply = "POST " + urlPathQuery + " HTTP/1.1\r\n"; + } else if(m_method == HTTP_HEAD) { + reply = "HEAD " + urlPathQuery + " HTTP/1.1\r\n"; + } else { + assert(false); + } + + for(unsigned int idx = 0; idx < m_headers.size(); idx++) { + string field = *(m_headers[idx].first); + string value = *(m_headers[idx].second); + + if((userAgent != NULL) && (field == "User-Agent")) { + value = string(userAgent); + } + + if(field == "Proxy-Connection") { + field = string("Connection"); + value = string("close"); + //value = string("keep-alive"); + } + + if(field != "Keep-Alive") { + reply += field + string(": ") + value + string("\r\n"); + } + } + + reply += string("\r\n"); + if(m_body.size() > 0) { + reply += m_body; + } + + if(m_method == HTTP_HEAD) { + cout << reply; + } + + return reply; +} + + +/****************************************************************************/ + + + +/************************** Private Functions *******************************/ + +HTTP::HttpState HTTP::getState() +{ + return m_state; +} + +void HTTP::setState(HttpState newState) +{ + m_state = newState; +} + +void HTTP::appendUrl(const char *at, size_t len) +{ + m_url.append(at, len); +} + +void HTTP::addHeaderField() +{ + if(m_field != NULL) { + assert(m_value != NULL); + if(*m_field == "Host") { + m_host = *m_value; + } + if(*m_field == "Eoh") { + cout << "got the Eoh header" << endl; + } + m_headers.insert(m_headers.end(), pair(m_field, m_value)); + m_field = NULL; + m_value = NULL; + } else { + assert(m_value == NULL); + } +} + +void HTTP::newHeaderField(const char *at, size_t len) +{ + addHeaderField(); + m_field = new string(at, len); + m_value = new string(); +} +void HTTP::appendHeaderField(const char *at, size_t len) +{ + assert(m_field != NULL); + m_field->append(at, len); +} + +void HTTP::appendHeaderValue(const char *at, size_t len) +{ + m_value->append(at, len); +} + +void HTTP::messageComplete(unsigned char method) +{ + if(m_httpType == HTTP_REQUEST) { + assert((method == HTTP_GET) || (method == HTTP_CONNECT) || (method == HTTP_POST) || (method == HTTP_HEAD)); + m_method = method; + } + m_doneParsing = true; +} + +/****************************************************************************/ diff --git a/HTTP.h b/HTTP.h new file mode 100644 index 0000000..aa7a35a --- /dev/null +++ b/HTTP.h @@ -0,0 +1,68 @@ +#ifndef _HTTP_H_ +#define _HTTP_H_ + +#include "http_parser.h" + +#include +#include +#include + +class HTTP { + public: + typedef enum {INIT, HEADER, FIELD, VALUE, BODY, DONE} HttpState; + + HTTP(http_parser_type httpType = HTTP_REQUEST); + ~HTTP(); + + int addData(const unsigned char *data, int len); + bool isDone(); + bool isHeaderDone(); + std::string getProxyRequest(const char *userAgent = NULL); + std::string getReplyHeader(); + std::string getHost(); + std::string getUrl(); + bool isConnect() {return m_method == HTTP_CONNECT;} + std::string getBody(); + + private: + static int message_begin_cb(http_parser *parser); + static int path_cb(http_parser *parser, const char *at, size_t length); + static int query_string_cb(http_parser *parser, const char *at, size_t length); + static int url_cb(http_parser *parser, const char *at, size_t length); + static int fragment_cb(http_parser *parser, const char *at, size_t length); + static int header_field_cb(http_parser *parser, const char *at, size_t length); + static int header_value_cb(http_parser *parser, const char *at, size_t length); + static int headers_complete_cb(http_parser *parser); + static int body_cb(http_parser *parser, const char *at, size_t length); + static int message_complete_cb(http_parser *parser); + + HttpState getState(); + void setState(HttpState newState); + void appendUrl(const char *at, size_t len); + void newHeaderField(const char *at, size_t len); + void appendHeaderField(const char *at, size_t len); + void appendHeaderValue(const char *at, size_t len); + void addHeaderField(); + void messageComplete(unsigned char method); + + http_parser_settings m_settings; + http_parser m_parser; + HttpState m_state; + bool m_doneParsing; + bool m_headerDone; + + std::string m_url; + std::string m_path; + std::string m_query; + std::string m_host; + std::string *m_field; + std::string *m_value; + std::vector< std::pair< std::string *, std::string *> > m_headers; + std::string m_body; + std::string m_statusStr; + unsigned char m_method; + http_parser_type m_httpType; + int m_extraParsedBytes; +}; + +#endif diff --git a/HTTPRequest.cpp b/HTTPRequest.cpp new file mode 100644 index 0000000..b1a9013 --- /dev/null +++ b/HTTPRequest.cpp @@ -0,0 +1,147 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + + +#include "HTTPRequest.h" + +#include +#include + +#include +#include + +#include "dbg.h" + +using namespace std; + +#define CONNECT_REPLY "HTTP/1.1 200 Connection Established\r\n\r\n" + +HTTPRequest::HTTPRequest(MySocket *sock, int serverPort) +{ + m_sock = sock; + m_http = new HTTP(); + m_serverPort = serverPort; + m_totalBytesRead = 0; + m_totalBytesWritten = 0; +} + +HTTPRequest::~HTTPRequest() +{ + delete m_http; +} + +void HTTPRequest::printDebugInfo() +{ + cerr << " isDone = " << m_http->isDone() << endl; + cerr << " bytesRead = " << m_totalBytesRead << endl; + cerr << " bytesWritte = " << m_totalBytesWritten << endl; + cerr << " url = " << m_http->getUrl() << endl; +} + +bool HTTPRequest::readRequest() +{ + assert(!m_http->isDone()); + unsigned char buf[1024]; + + int num_bytes; + while(!m_http->isDone()) { + num_bytes = m_sock->read(buf, sizeof(buf)); + if(num_bytes > 0) { + onRead(buf, (unsigned int) num_bytes); + } else { + cerr << "socket error" << endl; + return false; + } + } +// httpreq_dbg("req: %s\n", getUrl().c_str()); +// httpreq_dbg("req: %s\n", m_http->getProxyRequest().c_str()); + + return true; +} + + +void HTTPRequest::onRead(const unsigned char *buffer, unsigned int len) +{ + m_totalBytesRead += len; + + unsigned int bytesRead = 0; + assert(len > 0); + + while(bytesRead < len) { + assert(!m_http->isDone()); + int ret = m_http->addData(buffer + bytesRead, len - bytesRead); + assert(ret > 0); + bytesRead += ret; + + // This is a workaround for a parsing bug that sometimes + // crops up with connect commands. The parser will think + // it is done before it reads the last newline of some + // properly formatted connect requests + if(m_http->isDone() && (bytesRead < len)) { + if(m_http->isConnect() && ((len-bytesRead) == 1) && (buffer[bytesRead] == '\n')) { + break; + } else { + assert(false); + } + } + } +} + +string HTTPRequest::getHost() +{ + return m_http->getHost(); +} +string HTTPRequest::getRequest() +{ + return m_http->getProxyRequest(); +} +string HTTPRequest::getUrl() +{ + return m_http->getUrl(); +} + +bool HTTPRequest::isConnect() +{ + return m_http->isConnect(); +} diff --git a/HTTPRequest.h b/HTTPRequest.h new file mode 100644 index 0000000..182cda9 --- /dev/null +++ b/HTTPRequest.h @@ -0,0 +1,34 @@ +#ifndef HTTP_REQUEST_H_ +#define HTTP_REQUEST_H_ + +#include "MySocket.h" +#include "http_parser.h" +#include "HTTP.h" + +#include + +class HTTPRequest { + public: + HTTPRequest(MySocket *sock, int serverPort); + ~HTTPRequest(); + + bool readRequest(); + + std::string getHost(); + std::string getRequest(); + std::string getUrl(); + bool isConnect(); + + void printDebugInfo(); + + protected: + void onRead(const unsigned char *buffer, unsigned int len); + + MySocket *m_sock; + HTTP *m_http; + int m_serverPort; + unsigned long m_totalBytesRead; + unsigned long m_totalBytesWritten; +}; + +#endif diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ebf59c8 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +EXE = twproxy +all: $(EXE) + +CC = g++ +CFLAGS = -g -W -Wall -Wno-unused +LDFLAGS = -lpthread -lssl + +OBJS = main.o MyServerSocket.o MySocket.o HTTPRequest.o http_parser.o HTTP.o Cache.o CacheEntry.o + +-include $(OBJS:.o=.d) + +twproxy: $(OBJS) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) + +serv: serv.o + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) serv.o + +%.d: %.c + @set -e; $(CC) -MM $(CFLAGS) $< \ + | sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' > $@; + @[ -s $@ ] || rm -f $@ + +%.d: %.cpp + @set -e; $(CC) -MM $(CFLAGS) $< \ + | sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' > $@; + @[ -s $@ ] || rm -f $@ + +%.o: %.cpp + $(CC) $(CFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -f $(EXE) *.o *~ core.* *.d diff --git a/MyServerSocket.cpp b/MyServerSocket.cpp new file mode 100644 index 0000000..58dcc24 --- /dev/null +++ b/MyServerSocket.cpp @@ -0,0 +1,94 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "MyServerSocket.h" +#include +#include +#include +#include +#include +#include +#include +#include + +MyServerSocket::MyServerSocket(int port) +{ + struct sockaddr_in server; + int one = 1; + + // set up the server socket + serverFd = socket(AF_INET,SOCK_STREAM,0); + + server.sin_family = AF_INET; + server.sin_addr.s_addr = INADDR_ANY; + server.sin_port = htons((short) port); + + if (setsockopt(serverFd,SOL_SOCKET,SO_REUSEADDR,&one,sizeof(int)) == -1) { + throw MySocketException("error with set socket opts"); + } + + if( bind(serverFd,(struct sockaddr *) &server, sizeof(server)) ==-1){ + char str[1024]; + sprintf(str,"could not bind to port %d",port); + throw MySocketException(str); + } + + //set up a listen queue + listen(serverFd, 10); +} + +MySocket *MyServerSocket::accept() +{ + //check that the sockFd is valid + + struct sockaddr_in client; + socklen_t len = sizeof(client); + int clientFd = ::accept(serverFd, (struct sockaddr *) &client, &len); + + if(clientFd<0) { + throw MySocketException("Accept function call failed"); + } + + return new MySocket(clientFd); +} diff --git a/MyServerSocket.h b/MyServerSocket.h new file mode 100644 index 0000000..df7c81b --- /dev/null +++ b/MyServerSocket.h @@ -0,0 +1,31 @@ +#ifndef MYSERVERSOCKET_H +#define MYSERVERSOCKET_H + +#include "MySocket.h" +#include "MySocketException.h" + +class MyServerSocket { + public: + /** + * creates a new server socket and binds it to the port specified. + * if it cannot bind, it will throw a socket exception. + * + * @param port the port to bind to + */ + MyServerSocket(int port); + MyServerSocket() { serverFd = -1; } + + /** + * this function will accept incoming requests to connect and + * return the resulting socket + */ + MySocket *accept(); + + int getFd() { return serverFd; } + protected: + int serverFd; + +}; + + +#endif diff --git a/MySocket.cpp b/MySocket.cpp new file mode 100644 index 0000000..bed99f7 --- /dev/null +++ b/MySocket.cpp @@ -0,0 +1,450 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "MySocket.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "dbg.h" + +using namespace std; + +#define CHK_SSL(err) if ((err)==-1) { ERR_print_errors_fp(stderr); exit(2); } +#define CHK_NULL(x) if ((x)==NULL) exit (1) + +#define HOME "./" +#define CERTF HOME "cacert.pem" +#define KEYF HOME "privkey.pem" + + +MySocket::MySocket(const char *inetAddr, int port) +{ + struct sockaddr_in server; + struct addrinfo hints; + struct addrinfo *res; + + isSSL = false; + ctx = NULL; + ssl = NULL; + + // set up the new socket (TCP/IP) + sockFd = socket(AF_INET,SOCK_STREAM,0); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + int ret = getaddrinfo(inetAddr, NULL, &hints, &res); + if(ret != 0) { + string str; + str = string("Could not get host ") + string(inetAddr); + throw MySocketException(str.c_str()); + } + + server.sin_addr = ((struct sockaddr_in *) (res->ai_addr))->sin_addr; + server.sin_port = htons((short) port); + server.sin_family = AF_INET; + freeaddrinfo(res); + + // conenct to the server + if( connect(sockFd, (struct sockaddr *) &server, + sizeof(server)) == -1 ) { + throw MySocketException("Did not connect to the server"); + } + +} + +MySocket::MySocket(int socketFileDesc) +{ + sockFd = socketFileDesc; + isSSL = false; + ctx = NULL; + ssl = NULL; +} + +MySocket::~MySocket(void) +{ + close(); +} + + +EVP_PKEY * MySocket::readPublicKey(const char *certfile) +{ + FILE *fp = fopen (certfile, "r"); + X509 *x509; + EVP_PKEY *pkey; + if (!fp) + return NULL; + x509 = PEM_read_X509(fp, NULL, 0, NULL); + if (x509 == NULL) { + ERR_print_errors_fp (stderr); + return NULL; + } + fclose (fp); + pkey=X509_extract_key(x509); + X509_free(x509); + if (pkey == NULL) + ERR_print_errors_fp (stderr); + return pkey; +} + +EVP_PKEY *MySocket::readPrivateKey(const char *keyfile) +{ + FILE *fp = fopen(keyfile, "r"); + EVP_PKEY *pkey; + if (!fp) + return NULL; + pkey = PEM_read_PrivateKey(fp, NULL, 0, NULL); + fclose (fp); + if (pkey == NULL) + ERR_print_errors_fp (stderr); + return pkey; +} + +X509 *MySocket::readX509(const char *certfile) +{ + FILE *fp = fopen(certfile, "r"); + if(!fp) + return NULL; + //X509 *PEM_read_X509(FILE *fp, X509 **x, pem_password_cb *cb, void *u); + X509 *cert = PEM_read_X509(fp, NULL, NULL, NULL); CHK_NULL(cert); + fclose(fp); + return cert; +} + +X509 *MySocket::makeAndInitCert() +{ + X509 *new_cert = X509_new(); + X509_set_version(new_cert, 2); + long serialNumber = rand() % RAND_MAX; + ASN1_INTEGER_set(X509_get_serialNumber(new_cert), serialNumber); + X509_gmtime_adj(X509_get_notBefore(new_cert), 0); + X509_gmtime_adj(X509_get_notAfter(new_cert), (long)60*60*24*365); + return new_cert; +} + +void MySocket::initNewName(X509_NAME *new_name, X509_NAME *server_cert_subj_name) +{ + //now setup "CN" + int serv_cert_subjname_ent_num = X509_NAME_entry_count(server_cert_subj_name); + mitm_dbg("subject name entry number: %d\n", serv_cert_subjname_ent_num); + unsigned char *ent_data_str = NULL; + char *ent_obj_str = NULL; + X509_NAME_ENTRY *e = NULL; + ASN1_STRING *asn1_string = NULL; + ASN1_OBJECT *asn1_obj = NULL; + int n = -1; + for(int i; i < serv_cert_subjname_ent_num; i++) { + e = X509_NAME_get_entry(server_cert_subj_name, i); + asn1_string = X509_NAME_ENTRY_get_data(e); + asn1_obj = X509_NAME_ENTRY_get_object(e); + ASN1_STRING_to_UTF8(&ent_data_str, asn1_string); + n = OBJ_obj2nid(asn1_obj); + ent_obj_str = (char *)OBJ_nid2ln(n); + + mitm_dbg("name entry %d: %s %s\n", i, ent_obj_str, ent_data_str); + //XXX: ent_data_str and ent_obj_str are pointers to inside fields, should not free them + if(strncmp(ent_obj_str, "commonName", 10) == 0) { + mitm_dbg("setting CN to %s\n", ent_data_str); + if(!X509_NAME_add_entry_by_txt(new_name, "CN", MBSTRING_ASC, ent_data_str, -1, -1, 0)) { + ERR_print_errors_fp(stderr); + exit(7); + } + } + } + if(!X509_NAME_add_entry_by_txt(new_name, "C", MBSTRING_ASC, (const unsigned char *)"US", -1, -1, 0)) { + ERR_print_errors_fp(stderr); + exit(8); + } +} + +//commented some debug codes, keep them for now +X509 *MySocket::generateFakeCert(MySocket *clientSock) +{ + //do NOT free server_cert or these names, because enableSSLClient() is going to free it + + //get the certificate from the proxy <--> remotesite connection + X509 *server_cert = SSL_get_peer_certificate (clientSock->ssl); CHK_NULL(server_cert); + mitm_dbg("Original server certificate:\n"); + //also, do NOT free these two names + X509_NAME *server_cert_subj_name = X509_get_subject_name(server_cert); CHK_NULL(server_cert_subj_name); + X509_NAME *server_cert_issuer_name = X509_get_issuer_name(server_cert); CHK_NULL(server_cert_issuer_name); +/* + char * server_cert_subj_name_str = X509_NAME_oneline(server_cert_subj_name, 0, 0); + mitm_dbg("subject name: %s\n", server_cert_subj_name_str); + char *server_cert_issuer_name_str = X509_NAME_oneline(server_cert_issuer_name, 0, 0); + mitm_dbg("issuer name: %s\n", server_cert_issuer_name_str); + mitm_dbg("subject name entry number: %d\n", serv_cert_subjname_ent_num); + mitm_dbg("issuer name entry number: %d\n", X509_NAME_entry_count(server_cert_issuer_name)); +*/ + X509 *new_cert = makeAndInitCert(); + X509_NAME *new_name = X509_get_subject_name(new_cert); + initNewName(new_name, server_cert_subj_name); +/* + char *newname_subjname = X509_NAME_oneline(new_name, 0, 0); + mitm_dbg("new_name's subject name: %s\n", newname_subjname); +*/ + X509 *myCA = readX509(CERTF); CHK_NULL(myCA); + mitm_dbg("FAKE CA cert:\n"); + X509_NAME *caName = X509_get_subject_name(myCA); CHK_NULL(caName); +/* + char *caName_str = X509_NAME_oneline(caName, 0, 0); + mitm_dbg("FAKE CA name: %s\n", caName_str); + + X509_NAME *caIssuerName = X509_get_issuer_name(myCA); CHK_NULL(caIssuerName); + char *caIssuerName_str = X509_NAME_oneline(caIssuerName, 0, 0); + mitm_dbg("FAKE CA issuer name: %s\n", caIssuerName_str); +*/ + //set issuer name to myCA's subject name + X509_set_issuer_name(new_cert, caName); + //do NOT free these two keys, they are used by new_cert + EVP_PKEY *privKey = readPrivateKey(KEYF); CHK_NULL(privKey); + EVP_PKEY *pubKey = readPublicKey(CERTF); CHK_NULL(pubKey); + X509_set_pubkey(new_cert, pubKey); + //sign it + if(!X509_sign(new_cert, privKey, EVP_md5())) + CHK_NULL(NULL); + + return new_cert; +} + +void MySocket::close(void) +{ + if(sockFd<0) return; + + ::close(sockFd); + + sockFd = -1; + + isSSL = false; + + if(ssl != NULL) + SSL_free(ssl); + + if(ctx != NULL) + SSL_CTX_free(ctx); + + ssl = NULL; + ctx = NULL; +} + +void MySocket::enableSSLServer(MySocket *clientSock) +{ + if(sockFd < 0) return; + + ctx = SSL_CTX_new (SSLv23_server_method()); + if (!ctx) { + ERR_print_errors_fp(stderr); + exit(2); + } + + if (SSL_CTX_use_certificate_chain_file(ctx, CERTF) <= 0) { + ERR_print_errors_fp(stderr); + exit(3); + } + if (SSL_CTX_use_PrivateKey_file(ctx, KEYF, SSL_FILETYPE_PEM) <= 0) { + ERR_print_errors_fp(stderr); + exit(4); + } + + if (!SSL_CTX_check_private_key(ctx)) { + fprintf(stderr,"Private key does not match the certificate public key\n"); + exit(5); + } + + X509 *new_cert = generateFakeCert(clientSock); CHK_NULL(new_cert); + if(SSL_CTX_use_certificate(ctx, new_cert) != 1) { + ERR_print_errors_fp(stderr); + exit(6); + } + + ssl = SSL_new (ctx); CHK_NULL(ssl); + SSL_set_fd (ssl, sockFd); + int err = SSL_accept (ssl); CHK_SSL(err); + mitm_dbg("SSL connection using %s\n", SSL_get_cipher (ssl)); + isSSL = true; +} + +void MySocket::enableSSLClient(void) +{ + if(sockFd < 0) return; + + ctx = SSL_CTX_new (SSLv23_client_method()); + if (!ctx) { + ERR_print_errors_fp(stderr); + exit(2); + } + + ssl = SSL_new (ctx); CHK_NULL(ssl); + SSL_set_fd (ssl, sockFd); + int err = SSL_connect (ssl); CHK_SSL(err); + + mitm_dbg("SSL connection using %s\n", SSL_get_cipher (ssl)); + + X509 *server_cert = SSL_get_peer_certificate (ssl); CHK_NULL(server_cert); + mitm_dbg("Server certificate:\n"); + + char *str = X509_NAME_oneline (X509_get_subject_name (server_cert),0,0); + CHK_NULL(str); + mitm_dbg("\t subject: %s\n", str); + OPENSSL_free (str); + + str = X509_NAME_oneline (X509_get_issuer_name (server_cert),0,0); + CHK_NULL(str); + mitm_dbg("\t issuer: %s\n", str); + OPENSSL_free (str); + X509_free (server_cert); + isSSL = true; +} + +int MySocket::write(const void *buffer, int len) +{ + if(sockFd<0) return ENOT_CONNECTED; + + int ret; + + if(isSSL) { + ret = SSL_write(ssl, buffer, len); + } else { + ret = ::write(sockFd, buffer, len); + } + + if(ret != len) return ESOCKET_ERROR; + + return ret; +} + +bool MySocket::write_bytes(string buffer) +{ + return write_bytes(buffer.c_str(), buffer.size()); +} + +bool MySocket::write_bytes(const void *buffer, int len) +{ + const unsigned char *buf = (const unsigned char *) buffer; + int bytesWritten = 0; + + while(len > 0) { + bytesWritten = this->write(buf, len); + if(bytesWritten <= 0) { + return false; + } + buf += bytesWritten; + len -= bytesWritten; + } + + return true; + +} + +int MySocket::read(void *buffer, int len) +{ + if(sockFd<0) return ENOT_CONNECTED; + + int ret; + + if(isSSL) { + ret = SSL_read(ssl, buffer, len); + } else { + ret = ::read(sockFd, buffer, len); + } + + if(ret == 0) return ECONN_CLOSED; + if(ret < 0) return ESOCKET_ERROR; + + return ret; +} + + +/* +//we should not use default ctor anymore +MySocket::MySocket(void) +{ + sockFd = -1; + isSSL = false; + ctx = NULL; + ssl = NULL; +} +*/ +void MySocket::__enableSSLServer(void) +{ + if(sockFd < 0) return; + + ctx = SSL_CTX_new (SSLv23_server_method()); + if (!ctx) { + ERR_print_errors_fp(stderr); + exit(2); + } + + if (SSL_CTX_use_certificate_chain_file(ctx, CERTF) <= 0) { + ERR_print_errors_fp(stderr); + exit(3); + } + if (SSL_CTX_use_PrivateKey_file(ctx, KEYF, SSL_FILETYPE_PEM) <= 0) { + ERR_print_errors_fp(stderr); + exit(4); + } + + if (!SSL_CTX_check_private_key(ctx)) { + fprintf(stderr,"Private key does not match the certificate public key\n"); + exit(5); + } + + ssl = SSL_new (ctx); CHK_NULL(ssl); + SSL_set_fd (ssl, sockFd); + int err = SSL_accept (ssl); CHK_SSL(err); + + printf ("SSL connection using %s\n", SSL_get_cipher (ssl)); + isSSL = true; +} diff --git a/MySocket.h b/MySocket.h new file mode 100644 index 0000000..6b59383 --- /dev/null +++ b/MySocket.h @@ -0,0 +1,96 @@ +#ifndef MYSOCKET_H +#define MYSOCKET_H + +#define ENOT_CONNECTED -1 +#define EBROKEN_PIPE -2 +#define ECONN_CLOSED -3 +#define ESOCKET_ERROR -4 + +#include "MySocketException.h" + +#include +#include + +class MySocket { + public: + /* + * this is the constructor. It accepts a string representation of + * and ip address ("192.168.0.1") or domain name ("www.cs.uiuc.edu") + * and connects. Will throw an HostNotFound exception if the attepted + * connection fails. MySocket uses the TCP protocol. + * + * @param inetAddr either ip address, or the domain name + * @param port the port to connect to + */ + MySocket(const char *inetAddr, int port); + + /* + * this constructor will generally not be used except for by ServerSockets + */ + MySocket(int socketFileDesc); + + /* + * default constructor, makes sure the state is properly specified + */ + //hx: I don't think we should give default ctor anymore +// MySocket(void); + ~MySocket(void); + + /* + * reads the open socket. See the read system call + * + * @param buffer buffer of length len, where the data will be stored + * @param len the length of the buffer + * + * @return if there is no error, the number of bytes read in. + * ECONN_CLOSED - connection was closed + * EBROKEN_PIPE - broken pipe + * ENOT_CONNECTED - a connection was never established + */ + int read(void *buffer, int len); + + /* + * writes to the open socket, see the write system call. + * + * @param buffer the buffer where the data is stored + * @param len the length of the buffer + * + * @return if there is no error, the number of bytes wrote. + * ECONN_CLOSED - connection was closed + * EBROKEN_PIPE - broken pipe + * ENOT_CONNECTED - a connection was never established + */ + int write(const void *buffer, int len); + + bool write_bytes(std::string buffer); + bool write_bytes(const void *buffer, int len); + void __enableSSLServer(void); + void enableSSLServer(MySocket *); + void enableSSLClient(void); + + /* + * a helper function so select can be used + */ + int getFd(void) { return sockFd; } + + void close(void); + + protected: + //this is the function which generate a fake certificate, based on + //the proxy <--> remotesite connection. + X509 *generateFakeCert(MySocket *clentSock); + //these are helper functions to make fake certificate + EVP_PKEY *readPublicKey(const char *certfile); + EVP_PKEY *readPrivateKey(const char *keyfile); + X509 *readX509(const char *certfile); + X509 *makeAndInitCert(); + void initNewName(X509_NAME *new_name, X509_NAME *server_cert_subj_name); + + int sockFd; + void brokenPipe(int sigNo); + bool isSSL; + SSL_CTX *ctx; + SSL *ssl; +}; + +#endif diff --git a/MySocketException.h b/MySocketException.h new file mode 100644 index 0000000..981555f --- /dev/null +++ b/MySocketException.h @@ -0,0 +1,22 @@ +#ifndef MYSOCKETEXCEPTION_H +#define MYSOCKETEXCEPTION_H + +#define MSG_SIZE 100 + +#include + +class MySocketException { + public: + MySocketException(const char *message) { + strncpy(msg,message,MSG_SIZE-1); + } + + const char *toString() { + return msg; + } + + protected: + char msg[MSG_SIZE]; +}; + +#endif diff --git a/README b/README new file mode 100644 index 0000000..173e0d9 --- /dev/null +++ b/README @@ -0,0 +1,19 @@ +This is a very basic HTTP proxy. Currently it will create a new +thread for each new client connection and use blocking I/O calls to +get the request from the browser and then get a reply from the server. + +Normal proxy usage: ./twproxy +Voting proxy usage: ./twproxy -v + +To use the SSL/TLS MITM functionality you will need to generate a +private key and a self-signed certificate. To generate a key you can +use the following command: + +openssl genrsa -out privkey.pem 2048 + +and for a self-signed certificate using that private key: + +openssl req -new -x509 -key privkey.pem -out cacert.pem -days 1095 + +For more information about certificates and key, please refer to the +openssl documentation, which is where these examples came from. diff --git a/dbg.h b/dbg.h new file mode 100644 index 0000000..85d10e7 --- /dev/null +++ b/dbg.h @@ -0,0 +1,18 @@ +#ifndef _DBG_H_ +#define _DBG_H_ + +#include + +#define cache_dbg(...); + +#define httpreq_dbg(...); + +#define mitm_dbg(...); + +//#define cache_dbg(...) do{printf(__VA_ARGS__);}while(0); + +#define httpreq_dbg(...) do{printf(__VA_ARGS__);}while(0); + +//#define mitm_dbg(...) do{printf(__VA_ARGS__);}while(0); + +#endif diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..d788ad6 --- /dev/null +++ b/http_parser.c @@ -0,0 +1,1625 @@ +/* Copyright 2009,2010 Ryan Dahl + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include + + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +#define CALLBACK2(FOR) \ +do { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser)) return (p - data); \ + } \ +} while (0) + + +#define MARK(FOR) \ +do { \ + FOR##_mark = p; \ +} while (0) + +#define CALLBACK_NOCLEAR(FOR) \ +do { \ + if (FOR##_mark) { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser, \ + FOR##_mark, \ + p - FOR##_mark)) \ + { \ + return (p - data); \ + } \ + } \ + } \ +} while (0) + + +#define CALLBACK(FOR) \ +do { \ + CALLBACK_NOCLEAR(FOR); \ + FOR##_mark = NULL; \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { "DELETE" + , "GET" + , "HEAD" + , "POST" + , "PUT" + , "CONNECT" + , "OPTIONS" + , "TRACE" + , "COPY" + , "LOCK" + , "MKCOL" + , "MOVE" + , "PROPFIND" + , "PROPPATCH" + , "UNLOCK" + , "REPORT" + , "MKACTIVITY" + , "CHECKOUT" + , "MERGE" + }; + + +/* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header. + The 'A'-'Z' are lower-cased. */ +static const char acceptable_header[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', 0, 0, 0, 0, 0, 0, 0, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, 0, 0, 0, '-', 0, 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, 0, '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, 0, 0, 0, 0 }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', '!', '"', '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', '/', +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', '}', '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +static const uint8_t normal_url_char[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, 1, 1, 0, 1, 1, 1, 1, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1, 1, 1, 1, 1, 1, 1, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1, 1, 1, 1, 1, 1, 1, 0 }; + + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_host + , s_req_port + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + + , s_header_almost_done + + , s_headers_almost_done + /* Important: 's_headers_almost_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + , s_chunk_size_start + , s_chunk_size + , s_chunk_size_almost_done + , s_chunk_parameters + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + }; + + +#define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING)) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + }; + + +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_TRAILING = 1 << 3 + , F_UPGRADE = 1 << 4 + , F_SKIPBODY = 1 << 5 + }; + + +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define TOKEN(c) tokens[(unsigned char)c] + + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) if (cond) goto error +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + const char *p = data, *pe; + int64_t to_read; + + enum state state = (enum state) parser->state; + enum header_states header_state = (enum header_states) parser->header_state; + uint64_t index = parser->index; + uint64_t nread = parser->nread; + + if (len == 0) { + if (state == s_body_identity_eof) { + CALLBACK2(message_complete); + } + return 0; + } + + /* technically we could combine all of these (except for url_mark) into one + variable, saving stack space, but it seems more clear to have them + separated. */ + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *fragment_mark = 0; + const char *query_string_mark = 0; + const char *path_mark = 0; + const char *url_mark = 0; + + if (state == s_header_field) + header_field_mark = data; + if (state == s_header_value) + header_value_mark = data; + if (state == s_req_fragment) + fragment_mark = data; + if (state == s_req_query_string) + query_string_mark = data; + if (state == s_req_path) + path_mark = data; + if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash + || state == s_req_schema_slash_slash || state == s_req_port + || state == s_req_query_string_start || state == s_req_query_string + || state == s_req_host + || state == s_req_fragment_start || state == s_req_fragment) + url_mark = data; + + for (p=data, pe=data+len; p != pe; p++) { + ch = *p; + + if (PARSING_HEADER(state)) { + ++nread; + /* Buffer overflow attack */ + if (nread > HTTP_MAX_HEADER_SIZE) goto error; + } + + switch (state) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch == 'H') + state = s_res_or_resp_H; + else { + parser->type = HTTP_REQUEST; + goto start_req_method_assign; + } + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + state = s_res_HT; + } else { + if (ch != 'E') goto error; + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + index = 2; + state = s_req_method; + } + break; + + case s_start_res: + { + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + switch (ch) { + case 'H': + state = s_res_H; + break; + + case CR: + case LF: + break; + + default: + goto error; + } + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + state = s_res_HT; + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + state = s_res_HTT; + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + state = s_res_HTTP; + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + state = s_res_first_http_major; + break; + + case s_res_first_http_major: + if (ch < '1' || ch > '9') goto error; + parser->http_major = ch - '0'; + state = s_res_http_major; + break; + + /* major HTTP version or dot */ + case s_res_http_major: + { + if (ch == '.') { + state = s_res_first_http_minor; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) goto error; + break; + } + + /* first digit of minor HTTP version */ + case s_res_first_http_minor: + if (ch < '0' || ch > '9') goto error; + parser->http_minor = ch - '0'; + state = s_res_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_res_http_minor: + { + if (ch == ' ') { + state = s_res_first_status_code; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) goto error; + break; + } + + case s_res_first_status_code: + { + if (ch < '0' || ch > '9') { + if (ch == ' ') { + break; + } + goto error; + } + parser->status_code = ch - '0'; + state = s_res_status_code; + break; + } + + case s_res_status_code: + { + if (ch < '0' || ch > '9') { + switch (ch) { + case ' ': + state = s_res_status; + break; + case CR: + state = s_res_line_almost_done; + break; + case LF: + state = s_header_field_start; + break; + default: + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (parser->status_code > 999) goto error; + break; + } + + case s_res_status: + /* the human readable status. e.g. "NOT FOUND" + * we are not humans so just ignore this */ + if (ch == CR) { + state = s_res_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + state = s_header_field_start; + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch < 'A' || 'Z' < ch) goto error; + + start_req_method_assign: + parser->method = (enum http_method) 0; + index = 1; + switch (ch) { + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break; + case 'R': parser->method = HTTP_REPORT; break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; break; + default: goto error; + } + state = s_req_method; + break; + } + + case s_req_method: + { + if (ch == '\0') + goto error; + + const char *matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[index] == '\0') { + state = s_req_spaces_before_url; + } else if (ch == matcher[index]) { + ; /* nada */ + } else if (parser->method == HTTP_CONNECT) { + if (index == 1 && ch == 'H') { + parser->method = HTTP_CHECKOUT; + } else if (index == 2 && ch == 'P') { + parser->method = HTTP_COPY; + } + } else if (parser->method == HTTP_MKCOL) { + if (index == 1 && ch == 'O') { + parser->method = HTTP_MOVE; + } else if (index == 1 && ch == 'E') { + parser->method = HTTP_MERGE; + } else if (index == 2 && ch == 'A') { + parser->method = HTTP_MKACTIVITY; + } + } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') { + parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ + } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') { + parser->method = HTTP_PUT; + } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + parser->method = HTTP_PROPPATCH; + } else { + goto error; + } + + ++index; + break; + } + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + if (ch == '/') { + MARK(url); + MARK(path); + state = s_req_path; + break; + } + + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') { + MARK(url); + state = s_req_schema; + break; + } + + goto error; + } + + case s_req_schema: + { + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') break; + if (c >= '0' && c <= '9') break; + if (c == '-') break; + + if (ch == ':') { + state = s_req_schema_slash; + break; + } else if (ch == '.') { + state = s_req_host; + break; + } + + goto error; + } + + case s_req_schema_slash: + STRICT_CHECK(ch != '/'); + state = s_req_schema_slash_slash; + break; + + case s_req_schema_slash_slash: + STRICT_CHECK(ch != '/'); + state = s_req_host; + break; + + case s_req_host: + { + c = LOWER(ch); + if (c >= 'a' && c <= 'z') break; + if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break; + switch (ch) { + case ':': + state = s_req_port; + break; + case '/': + MARK(path); + state = s_req_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(url); + state = s_req_http_start; + break; + default: + goto error; + } + break; + } + + case s_req_port: + { + if (ch >= '0' && ch <= '9') break; + switch (ch) { + case '/': + MARK(path); + state = s_req_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com:1234 HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(url); + state = s_req_http_start; + break; + default: + goto error; + } + break; + } + + case s_req_path: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case ' ': + CALLBACK(url); + CALLBACK(path); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(path); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(path); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + CALLBACK(path); + state = s_req_query_string_start; + break; + case '#': + CALLBACK(path); + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_query_string_start: + { + if (normal_url_char[(unsigned char)ch]) { + MARK(query_string); + state = s_req_query_string; + break; + } + + switch (ch) { + case '?': + break; /* XXX ignore extra '?' ... is this right? */ + case ' ': + CALLBACK(url); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_query_string: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + break; + case ' ': + CALLBACK(url); + CALLBACK(query_string); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(query_string); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(query_string); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + CALLBACK(query_string); + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_fragment_start: + { + if (normal_url_char[(unsigned char)ch]) { + MARK(fragment); + state = s_req_fragment; + break; + } + + switch (ch) { + case ' ': + CALLBACK(url); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + MARK(fragment); + state = s_req_fragment; + break; + case '#': + break; + default: + goto error; + } + break; + } + + case s_req_fragment: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case ' ': + CALLBACK(url); + CALLBACK(fragment); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(fragment); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(fragment); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + case '#': + break; + default: + goto error; + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + state = s_req_http_H; + break; + case ' ': + break; + default: + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + state = s_req_http_HT; + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + state = s_req_http_HTT; + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + state = s_req_http_HTTP; + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + state = s_req_first_http_major; + break; + + /* first digit of major HTTP version */ + case s_req_first_http_major: + if (ch < '1' || ch > '9') goto error; + parser->http_major = ch - '0'; + state = s_req_http_major; + break; + + /* major HTTP version or dot */ + case s_req_http_major: + { + if (ch == '.') { + state = s_req_first_http_minor; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) goto error; + break; + } + + /* first digit of minor HTTP version */ + case s_req_first_http_minor: + if (ch < '0' || ch > '9') goto error; + parser->http_minor = ch - '0'; + state = s_req_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_req_http_minor: + { + if (ch == CR) { + state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (ch < '0' || ch > '9') goto error; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) goto error; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) goto error; + state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + state = s_headers_almost_done; + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + state = s_headers_almost_done; + goto headers_almost_done; + } + + c = TOKEN(ch); + + if (!c) goto error; + + MARK(header_field); + + index = 0; + state = s_header_field; + + switch (c) { + case 'c': + header_state = h_C; + break; + + case 'p': + header_state = h_matching_proxy_connection; + break; + + case 't': + header_state = h_matching_transfer_encoding; + break; + + case 'u': + header_state = h_matching_upgrade; + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + c = TOKEN(ch); + + if (c) { + switch (header_state) { + case h_general: + break; + + case h_C: + index++; + header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + index++; + header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + index++; + switch (c) { + case 'n': + header_state = h_matching_connection; + break; + case 't': + header_state = h_matching_content_length; + break; + default: + header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + index++; + if (index > sizeof(CONNECTION)-1 + || c != CONNECTION[index]) { + header_state = h_general; + } else if (index == sizeof(CONNECTION)-2) { + header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + index++; + if (index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[index]) { + header_state = h_general; + } else if (index == sizeof(PROXY_CONNECTION)-2) { + header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + index++; + if (index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[index]) { + header_state = h_general; + } else if (index == sizeof(CONTENT_LENGTH)-2) { + header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + index++; + if (index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[index]) { + header_state = h_general; + } else if (index == sizeof(TRANSFER_ENCODING)-2) { + header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + index++; + if (index > sizeof(UPGRADE)-1 + || c != UPGRADE[index]) { + header_state = h_general; + } else if (index == sizeof(UPGRADE)-2) { + header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + break; + } + + if (ch == ':') { + CALLBACK(header_field); + state = s_header_value_start; + break; + } + + if (ch == CR) { + state = s_header_almost_done; + CALLBACK(header_field); + break; + } + + if (ch == LF) { + CALLBACK(header_field); + state = s_header_field_start; + break; + } + + goto error; + } + + case s_header_value_start: + { + if (ch == ' ') break; + + MARK(header_value); + + state = s_header_value; + index = 0; + + c = acceptable_header[(unsigned char)ch]; + + if (!c) { + if (ch == CR) { + CALLBACK(header_value); + header_state = h_general; + state = s_header_almost_done; + break; + } + + if (ch == LF) { + CALLBACK(header_value); + state = s_header_field_start; + break; + } + + header_state = h_general; + break; + } + + switch (header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + header_state = h_matching_transfer_encoding_chunked; + } else { + header_state = h_general; + } + break; + + case h_content_length: + if (ch < '0' || ch > '9') goto error; + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + header_state = h_matching_connection_close; + } else { + header_state = h_general; + } + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + c = acceptable_header[(unsigned char)ch]; + + if (!c) { + if (ch == CR) { + CALLBACK(header_value); + state = s_header_almost_done; + break; + } + + if (ch == LF) { + CALLBACK(header_value); + goto header_almost_done; + } + break; + } + + switch (header_state) { + case h_general: + break; + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + if (ch == ' ') break; + if (ch < '0' || ch > '9') goto error; + parser->content_length *= 10; + parser->content_length += ch - '0'; + break; + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + index++; + if (index > sizeof(CHUNKED)-1 + || c != CHUNKED[index]) { + header_state = h_general; + } else if (index == sizeof(CHUNKED)-2) { + header_state = h_transfer_encoding_chunked; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + index++; + if (index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[index]) { + header_state = h_general; + } else if (index == sizeof(KEEP_ALIVE)-2) { + header_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + index++; + if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) { + header_state = h_general; + } else if (index == sizeof(CLOSE)-2) { + header_state = h_connection_close; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') header_state = h_general; + break; + + default: + state = s_header_value; + header_state = h_general; + break; + } + break; + } + + case s_header_almost_done: + header_almost_done: + { + STRICT_CHECK(ch != LF); + + state = s_header_field_start; + + switch (header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + break; + } + + case s_headers_almost_done: + headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + break; + } + + nread = 0; + + if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) { + parser->upgrade = 1; + } + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + return p - data; /* Error */ + } + } + + /* Exit, the rest of the connect is in a different protocol. */ + if (parser->upgrade) { + CALLBACK2(message_complete); + return (p - data); + } + + if (parser->flags & F_SKIPBODY) { + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else if (parser->content_length > 0) { + /* Content-Length header given and non-zero */ + state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { + /* Assume content-length 0 - read the next */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else { + /* Read body until EOF */ + state = s_body_identity_eof; + } + } + } + + break; + } + + case s_body_identity: + to_read = MIN(pe - p, (int64_t)parser->content_length); + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + parser->content_length -= to_read; + if (parser->content_length == 0) { + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } + } + break; + + /* read until EOF */ + case s_body_identity_eof: + to_read = pe - p; + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + } + break; + + case s_chunk_size_start: + { + assert(parser->flags & F_CHUNKED); + + c = unhex[(unsigned char)ch]; + if (c == -1) goto error; + parser->content_length = c; + state = s_chunk_size; + break; + } + + case s_chunk_size: + { + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + + c = unhex[(unsigned char)ch]; + + if (c == -1) { + if (ch == ';' || ch == ' ') { + state = s_chunk_parameters; + break; + } + goto error; + } + + parser->content_length *= 16; + parser->content_length += c; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + state = s_header_field_start; + } else { + state = s_chunk_data; + } + break; + } + + case s_chunk_data: + { + assert(parser->flags & F_CHUNKED); + + to_read = MIN(pe - p, (int64_t)(parser->content_length)); + + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + } + + if (to_read == parser->content_length) { + state = s_chunk_data_almost_done; + } + + parser->content_length -= to_read; + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != CR); + state = s_chunk_data_done; + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + state = s_chunk_size_start; + break; + + default: + assert(0 && "unhandled state"); + goto error; + } + } + + CALLBACK_NOCLEAR(header_field); + CALLBACK_NOCLEAR(header_value); + CALLBACK_NOCLEAR(fragment); + CALLBACK_NOCLEAR(query_string); + CALLBACK_NOCLEAR(path); + CALLBACK_NOCLEAR(url); + + parser->state = state; + parser->header_state = header_state; + parser->index = index; + parser->nread = nread; + + return len; + +error: + parser->state = s_dead; + return (p - data); +} + + +int +http_should_keep_alive (http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } else { + return 1; + } + } else { + /* HTTP/1.0 or earlier */ + if (parser->flags & F_CONNECTION_KEEP_ALIVE) { + return 1; + } else { + return 0; + } + } +} + + +const char * http_method_str (enum http_method m) +{ + return method_strings[m]; +} + + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->nread = 0; + parser->upgrade = 0; + parser->flags = 0; + parser->method = 0; +} diff --git a/http_parser.h b/http_parser.h new file mode 100644 index 0000000..0a76550 --- /dev/null +++ b/http_parser.h @@ -0,0 +1,176 @@ +/* Copyright 2009,2010 Ryan Dahl + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + + +#include +#ifdef _WIN32 + typedef __int8 int8_t; + typedef unsigned __int8 uint8_t; + typedef __int16 int16_t; + typedef unsigned __int16 uint16_t; + typedef __int32 int32_t; + typedef unsigned __int32 uint32_t; + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; + + typedef unsigned int size_t; + typedef int ssize_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#else +# define HTTP_PARSER_STRICT 0 +#endif + + +/* Maximium header size allowed */ +#define HTTP_MAX_HEADER_SIZE (80*1024) + + + typedef struct http_parser http_parser; + typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_path" + * each providing just a few characters more data. + */ + typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); + typedef int (*http_cb) (http_parser*); + + +/* Request Methods */ + enum http_method + { HTTP_DELETE = 0 + , HTTP_GET + , HTTP_HEAD + , HTTP_POST + , HTTP_PUT + /* pathological */ + , HTTP_CONNECT + , HTTP_OPTIONS + , HTTP_TRACE + /* webdav */ + , HTTP_COPY + , HTTP_LOCK + , HTTP_MKCOL + , HTTP_MOVE + , HTTP_PROPFIND + , HTTP_PROPPATCH + , HTTP_UNLOCK + /* subversion */ + , HTTP_REPORT + , HTTP_MKACTIVITY + , HTTP_CHECKOUT + , HTTP_MERGE + }; + + + enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + + struct http_parser { + /** PRIVATE **/ + unsigned char type : 2; + unsigned char flags : 6; + unsigned char state; + unsigned char header_state; + unsigned char index; + + uint32_t nread; + int64_t content_length; + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned short status_code; /* responses only */ + unsigned char method; /* requests only */ + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + char upgrade; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ + }; + + + struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_path; + http_data_cb on_query_string; + http_data_cb on_url; + http_data_cb on_fragment; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; + }; + + + void http_parser_init(http_parser *parser, enum http_parser_type type); + + + size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns true, then this will be should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ + int http_should_keep_alive(http_parser *parser); + +/* Returns a string version of the HTTP method. */ + const char *http_method_str(enum http_method); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..b17c904 --- /dev/null +++ b/main.cpp @@ -0,0 +1,263 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include +#include + +#include +#include +#include +#include +#include + +#include "MyServerSocket.h" +#include "HTTPRequest.h" +#include "Cache.h" +#include "dbg.h" +#include "time.h" + +using namespace std; + +int serverPorts[] = {8808, 8809, 8810}; +#define NUM_SERVERS (sizeof(serverPorts) / sizeof(serverPorts[0])) + +static string CONNECT_REPLY = "HTTP/1.1 200 Connection Established\r\n\r\n"; + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static unsigned long numThreads = 0; + +struct client_struct { + MySocket *sock; + int serverPort; + queue *killQueue; +}; + +struct server_struct { + int serverPort; +}; + +pthread_t server_threads[NUM_SERVERS]; +static int gVOTING = 0; + +void run_client(MySocket *sock, int serverPort) +{ + HTTPRequest *request = new HTTPRequest(sock, serverPort); + +// httpreq_dbg("%d: ", serverPort); + if(!request->readRequest()) { + cout << "did not read request" << endl; + } else { + bool error = false; + bool isSSL = false; + + string host = request->getHost(); + string url = request->getUrl(); + + MySocket *replySock = NULL; + + if(request->isConnect()) { +// cerr << serverPort << " connect request for " << host << " " << url << endl; + if(!sock->write_bytes(CONNECT_REPLY)) { + error = true; + } else { + delete request; + replySock = cache()->getReplySocket(host, true); + //need proxy <--> remotesite socket for information needed to fake a certificate + sock->enableSSLServer(replySock); + isSSL = true; + request = new HTTPRequest(sock, serverPort); + if(!request->readRequest()) { + error = true; + } + } + } else + replySock = cache()->getReplySocket(host, false); + + if(!error) { + string req = request->getRequest(); + if(gVOTING == 0) { + cache()->getHTTPResponseNoVote(host, req, url, serverPort, sock, isSSL, replySock); + } else { +// if(isSSL == true) +// cache()->getHTTPResponseVote(host, req, url, serverPort, sock, isSSL, replySock); +// else + cache()->getHTTPResponseVote(host, req, url, serverPort, sock, isSSL, replySock); + } + + } + } + + sock->close(); + delete request; +} + +void *client_thread(void *arg) +{ + struct client_struct *cs = (struct client_struct *) arg; + MySocket *sock = cs->sock; + int serverPort = cs->serverPort; + queue *killQueue = cs->killQueue; + + delete cs; + + pthread_mutex_lock(&mutex); + numThreads++; + //cout << "numThread = " << numThreads << endl; + pthread_mutex_unlock(&mutex); + + run_client(sock, serverPort); + + pthread_mutex_lock(&mutex); + numThreads--; + //cout << "numThread = " << numThreads << endl; + + // This is a hack because linux is having trouble freeing memory + // in a different thread, so instead we will let the server thread + // free this memory + killQueue->push(sock); + pthread_mutex_unlock(&mutex); + + return NULL; +} + +void start_client(MySocket *sock, int serverPort, queue *killQueue) +{ + struct client_struct *cs = new struct client_struct; + cs->sock = sock; + cs->serverPort = serverPort; + cs->killQueue = killQueue; + + pthread_t tid; + int ret = pthread_create(&tid, NULL, client_thread, cs); + assert(ret == 0); + ret = pthread_detach(tid); + assert(ret == 0); +} + +void *server_thread(void *arg) +{ + struct server_struct *ss = (struct server_struct *)arg; + int port = ss->serverPort; + delete ss; + + MyServerSocket *server = new MyServerSocket(port); + assert(server != NULL); + MySocket *client; + queue killQueue; + while(true) { + try { + client = server->accept(); + } catch(MySocketException e) { + cerr << e.toString() << endl; + exit(1); + } + pthread_mutex_lock(&mutex); + while(killQueue.size() > 0) { + delete killQueue.front(); + killQueue.pop(); + } + pthread_mutex_unlock(&mutex); + start_client(client, port, &killQueue); + } + return NULL; +} + + +pthread_t start_server(int port) +{ + cerr << "starting server on port " << port << endl; + server_struct *ss = new struct server_struct; + ss->serverPort = port; + pthread_t tid; + int ret = pthread_create(&tid, NULL, server_thread, ss); + assert(ret == 0); + return tid; +} + +static void get_opts(int argc, char *argv[]) +{ + int c; + while((c = getopt(argc, argv, "v")) != EOF) { + switch(c) { + case 'v': + gVOTING = 1; + break; + default: + cerr << "Wrong Argument." << endl; + exit(1); + break; + } + } +} +int main(int argc, char *argv[]) +{ + //if started with "-v" option, voting will be enabled. Otherwise, just a plain + //proxy + get_opts(argc, argv); + // get socket write errors from write call + signal(SIGPIPE, SIG_IGN); + + // initialize ssl library + SSL_load_error_strings(); + SSL_library_init(); + + cout << "number of servers: " << NUM_SERVERS << endl; + + //when generating serial number for X509, need random number + srand(time(NULL)); + Cache::setNumBrowsers(NUM_SERVERS); + + pthread_t tid; + int ret; + for(unsigned int idx = 0; idx < NUM_SERVERS; idx++) { + tid = start_server(serverPorts[idx]); + server_threads[idx] = tid; + } + + for(unsigned int idx = 0; idx < NUM_SERVERS; idx++) { + ret = pthread_join(server_threads[idx], NULL); + assert(ret == 0); + } + return 0; +}