diff --git a/Cache.cpp b/Cache.cpp new file mode 100644 index 0000000..d31ea5c --- /dev/null +++ b/Cache.cpp @@ -0,0 +1,376 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "Cache.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "CacheEntry.h" +#include "dbg.h" + +using namespace std; + +static Cache globalCache; +int Cache::num_browsers = 0; + +static string reply404 = "HTTP/1.1 404 Not Found\r\nServer: twproxy\r\nConnection: close\r\nContent-Length: 0\r\n\r\n"; + + +extern int serverPorts[]; + +MySocket *Cache::getReplySocket(string host, bool isSSL) +{ + assert(host.find(':') != string::npos); + assert(host.find(':') < (host.length()-1)); + string portStr = host.substr(host.find(':')+1); + string hostStr = host.substr(0, host.find(':')); + int port; + int ret = sscanf(portStr.c_str(), "%d", &port); + assert((ret == 1) && (port > 0)); + MySocket *replySock = NULL; + try { + //cout << "making connection to " << hostStr << ":" << port << endl; + replySock = new MySocket(hostStr.c_str(), port); + if(isSSL) { + replySock->enableSSLClient(); + } + } catch(char *e) { + cout << e << endl; + } catch(...) { + cout << "could not connect to " << hostStr << ":" << port << endl; + } + return replySock; +} + +//XXX: should check url, method, cookie, possible even port +CacheEntry *Cache::find(string url, string /*request*/) { + map::iterator i = m_store.find(url); + if(i == m_store.end()) + return NULL; + else + return (CacheEntry *)(i->second); +} + +void Cache::addToStore(string url, CacheEntry *ent) { + assert(ent != NULL); + m_store.insert(pair(url, ent)); +} + +int Cache::votingFetchInsertWriteback(string url, string request, int browserId, MySocket *browserSock, string host, bool isSSL, MySocket *replySock) +{ + assert(browserId >= 0); + //find() will take care of all checks, including same url, same method, different + //cookie for the same browser or different browser + CacheEntry *ent = find(url, request); + if(ent == NULL) { + //first request of this url, wait till someone vote and fetch then write + //back to brower, or keep waiting forever + cache_dbg("browser: %d ,%s no exist\n", browserId, url.c_str()); + ent = new CacheEntry(num_browsers, url, request); + ent->setReqState(CACHE_NEW); + cache_dbg("browser: %d ,%s SET to cache_new\n", browserId, url.c_str()); + int ret = ent->updateReqVec(browserId); + + vector v = ent->getReqVec(); + printf("%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); + + + assert(ret == 1); + addToStore(url, ent); + while(ent->getReqState() != CACHE_IN) { + cache_dbg("CACHE_NEW cache before WAIT browser %d %s\n", browserId, url.c_str()); + pthread_cond_wait(&cache_cond, &cache_mutex); + cache_dbg("CACHE_NEW cache AFTER WAIT browser %d %s\n", browserId, url.c_str()); + } + + + v = ent->getReqVec(); + printf("%d waked up\n%d %d %d %s\n", browserId, v[0], v[1], v[2], url.c_str()); + + sendBrowser(browserSock, ent, browserId); + } + else if(ent->getReqState() == CACHE_IN) { + //this request is fetched + cache_dbg("browser: %d ,%s is CACHE_IN\n", browserId, url.c_str()); + int ret = ent->updateReqVec(browserId); + + vector v = ent->getReqVec(); + printf("%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); + + //opera is really making two same requests to http://google.com, no difference +// assert(ret == 1); + sendBrowser(browserSock, ent, browserId); + } + else if(ent->getReqState() == CACHE_FETCHING) { + //somebody is fetching the request, wait till done + cache_dbg("browser: %d ,%s is CACHE_FETCHING\n", browserId, url.c_str()); + int ret = ent->updateReqVec(browserId); + + vector v = ent->getReqVec(); + printf("fetching\n%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); + + +// assert(ret == 1); + while(ent->getReqState() != CACHE_IN) { + cache_dbg("CACHE_FETCHING cache before WAIT browser %d %s\n", browserId, url.c_str()); + pthread_cond_wait(&cache_cond, &cache_mutex); + cache_dbg("CACHE_FETCHING cache AFTER WAIT browser %d %s\n", browserId, url.c_str()); + } + + + v = ent->getReqVec(); + printf("%d waked up\n%d %d %d %s\n", browserId, v[0], v[1], v[2], url.c_str()); + + + sendBrowser(browserSock, ent, browserId); + } + else if(ent->getReqState() == CACHE_NEW) { + //vote for some previous request from someone, and FETCH + //it won't be my own old request, find() is going to take of that + int ret = ent->updateReqVec(browserId); + vector v = ent->getReqVec(); + printf("%d %d %d %s\n", v[0], v[1], v[2], url.c_str()); +// assert(ret == 1); + cache_dbg("browser: %d ,%s SET to cache_fetching\n", browserId, url.c_str()); + ent->setReqState(CACHE_FETCHING); + cache_dbg("cache UNlock browser %d %s\n", browserId, url.c_str()); + pthread_mutex_unlock(&cache_mutex); + fetch(ent, host, isSSL, browserId, replySock); + pthread_mutex_lock(&cache_mutex); + cache_dbg("cache lock browser %d %s\n", browserId, url.c_str()); + cache_dbg("browser: %d ,%s SET to cache_in\n", browserId, url.c_str()); + ent->setReqState(CACHE_IN); + sendBrowser(browserSock, ent, browserId); + } + else + assert(false); + return 0; +} + +int Cache::sendBrowser(MySocket *browserSock, CacheEntry *ent, int browserId) { + cache_dbg("sendBrowser send to browser %d, response length %d\n", browserId, ent->getResponse().length()); + bool ret = browserSock->write_bytes(ent->getResponse().c_str(), ent->getResponse().length()); + + ent->updateRespVec(browserId); + return 0; +} + +void Cache::getHTTPResponseVote(string host, string request, string url, int serverPort, + MySocket *browserSock, bool isSSL, MySocket *replySock) +{ + int browserId = -1; + pthread_mutex_lock(&cache_mutex); + browserId = serverPort - serverPorts[0]; + cache_dbg("cache lock browser %d %s\n", browserId, url.c_str()); + votingFetchInsertWriteback(url, request, browserId, browserSock, host, isSSL, replySock); + cache_dbg("cache UNlock browser %d %s\n", browserId, url.c_str()); + pthread_mutex_unlock(&cache_mutex); + cache_dbg("cache BROADCAST browser %d %s\n", browserId, url.c_str()); + pthread_cond_broadcast(&cache_cond); +} + +static void dbg_fetch(int ret) { + switch(ret) { + case ENOT_CONNECTED: + cache_dbg("ESOCKET_CONNECTED returned by replySock->read()\n"); + break; + case ECONN_CLOSED: + //cache_dbg("ESOCKET_CLOSED returned by replySock->read()\n"); + break; + case ESOCKET_ERROR: + cache_dbg("ESOCKET_ERROR returned by replySock->read()\n"); + break; + default: + cache_dbg("%d bytes read by replySock->read()\n", ret); + break; + } +} + + +int Cache::fetch(CacheEntry *ent, string host, bool isSSL, int browserId, MySocket *replySock) { + if(replySock == NULL) { + cout << "returning 404" << endl; + ent->appendResponse(reply404); + return -1; + } + cache_dbg("CACHE: SENDING request %s\n", ent->getRequest().c_str()); + if(!replySock->write_bytes(ent->getRequest())) { + cout << "returning 404" << endl; + ent->appendResponse(reply404); + return -1; + } + printf("FETCH CALLED %s, response length: %d\n", ent->getUrl().c_str(), ent->getResponse().length()); + unsigned char buf[1024]; + int num_bytes; + cache_dbg("browser %d: BEFORE FETCHING %s\n", browserId, ent->getUrl().c_str()); + while((num_bytes = replySock->read(buf, sizeof(buf))) > 0) { + cache_dbg("CACHE: fetch read %d bytes\n", num_bytes); + ent->appendResponse((const char *)buf, num_bytes); + cache_dbg("ent->resposne length: %d\n", ent->getResponse().length()); + } + dbg_fetch(num_bytes); + + printf("FETCHED %s, response length: %d\n", ent->getUrl().c_str(), ent->getResponse().length()); + cache_dbg("browser %d: done FETCHING %s, response length: %d\n", browserId, ent->getUrl().c_str(), ent->getResponse().length()); + cache_dbg("browser %d: fetched %s\n%s\n", browserId, ent->getUrl().c_str(), ent->getResponse().c_str()); + delete replySock; + return 0; +} + +void Cache::handleResponse(MySocket *browserSock, MySocket *replySock, string request) +{ + if(!replySock->write_bytes(request)) { + // XXX FIXME we should do something other than 404 here + browserSock->write_bytes(reply404); + return; + } + unsigned char buf[1024]; + int num_bytes; + bool ret; + while((num_bytes = replySock->read(buf, sizeof(buf))) > 0) { + ret = browserSock->write_bytes(buf, num_bytes); + if(!ret) { + break; + } + } +} + +void Cache::getHTTPResponseNoVote(string host, string request, string url, int serverPort, + MySocket *browserSock, bool isSSL, MySocket *replySock) +{ + if(replySock == NULL) { + cout << "returning 404" << endl; + browserSock->write_bytes(reply404); + return; + } + handleResponse(browserSock, replySock, request); + + delete replySock; +} + +void Cache::setNumBrowsers(const int num) +{ + num_browsers = (int)num; +} + + + +Cache *cache() +{ + return &globalCache; +} + + +Cache::Cache() +{ + pthread_mutex_init(&cache_mutex, NULL); + pthread_cond_init(&cache_cond, NULL); +} +Cache::~Cache() +{ + pthread_cond_destroy(&cache_cond); + pthread_mutex_destroy(&cache_mutex); +} + + + +/* +bool Cache::copyNetBytes(MySocket *readSock, MySocket *writeSock) +{ + unsigned char buf[1024]; + int ret; + + ret = readSock->read(buf, sizeof(buf)); + if(ret <= 0) + return false; + + return writeSock->write_bytes(buf, ret); +} + +void Cache::handleTunnel(MySocket *browserSock, MySocket *replySock) +{ + if(!browserSock->write_bytes(CONNECT_REPLY)) + return; + + int bFd = browserSock->getFd(); + int rFd = replySock->getFd(); + + int ret; + fd_set readSet; + + int maxFd = (bFd > rFd) ? bFd : rFd; + + while(true) { + FD_ZERO(&readSet); + + FD_SET(rFd, &readSet); + FD_SET(bFd, &readSet); + + ret = select(maxFd+1, &readSet, NULL, NULL, NULL); + + if(ret <= 0) + break; + + if(FD_ISSET(rFd, &readSet)) { + if(!copyNetBytes(replySock, browserSock)) { + break; + } + } + + if(FD_ISSET(bFd, &readSet)) { + if(!copyNetBytes(browserSock, replySock)) { + break; + } + } + } +} +*/ diff --git a/Cache.h b/Cache.h new file mode 100644 index 0000000..edd3ce5 --- /dev/null +++ b/Cache.h @@ -0,0 +1,53 @@ +#ifndef _CACHE_H_ +#define _CACHE_H_ + +#include +#include "MySocket.h" + +#include "CacheEntry.h" +#include +#include +class Cache { + public: + Cache(); + virtual ~Cache(); + void getHTTPResponseNoVote(std::string host, std::string request, std::string url, + int serverPort, MySocket *browserSock, bool isSSL, MySocket *replySock); + void getHTTPResponseVote(std::string host, std::string request, std::string url, + int serverPort, MySocket *browserSock, bool isSSL, MySocket *replySock); + MySocket *getReplySocket(std::string host, bool isSSL); + static void setNumBrowsers(const int num); + protected: + void handleResponse(MySocket *browserSock, MySocket *replySock, std::string request); + //void handleTunnel(MySocket *browserSock, MySocket *replySock); + //bool copyNetBytes(MySocket *readSock, MySocket *writeSock); + + + //Need this "string request" parameter, because we have to buffer it, and send it + //out later if voted + //Must grab cache_mutex + int votingFetchInsertWriteback(std::string url, std::string request, int browserId, + MySocket *browserSock, std::string host, bool isSSL, MySocket *replySock); + //Must grab cache_mutex + void addToStore(std::string url, CacheEntry *ent); + //Must grab cache_mutex + CacheEntry *find(std::string url, std::string request); + //Must grab cache_mutex + int sendBrowser(MySocket *browserSock, CacheEntry *ent, int browserId); + + int fetch(CacheEntry *ent, std::string host, bool isSSL, int browserId, MySocket *replySock); + + + std::map m_store; + + pthread_mutex_t cache_mutex; + pthread_cond_t cache_cond; + + static int num_browsers; +}; + + + +Cache *cache(); + +#endif diff --git a/CacheEntry.cpp b/CacheEntry.cpp new file mode 100644 index 0000000..43f2ae1 --- /dev/null +++ b/CacheEntry.cpp @@ -0,0 +1,66 @@ +#include "CacheEntry.h" + +CacheEntry::CacheEntry(int num_browsers, std::string _url, std::string _request) +{ + url = _url; + method = METHOD_UNDEFINED; + cache_state = CACHE_UNDEFINED; + m_numBrowsers = num_browsers; + request = _request; + for(int i = 0; i < num_browsers; i++) { + req_vec.push_back(0); + resp_vec.push_back(0); + } +} + +CacheEntry::~CacheEntry() +{ +} + +void CacheEntry::setReqState(REQUEST_CACHE_STATE s) +{ + cache_state = s; +} + +REQUEST_CACHE_STATE CacheEntry::getReqState() +{ + return cache_state; +} + +int CacheEntry::updateReqVec(int browserId) +{ + assert(browserId < m_numBrowsers); + return ++req_vec[browserId]; +} + +int CacheEntry::updateRespVec(int browserId) +{ + assert(browserId < m_numBrowsers); +// assert(resp_vec[browserId] == 0); + return ++resp_vec[browserId]; +} + +std::string CacheEntry::getRequest() +{ + return request; +} + +std::string CacheEntry::getResponse() +{ + return response; +} + +void CacheEntry::appendResponse(std::string part) +{ + response.append(part.c_str()); +} + +void CacheEntry::appendResponse(const char *part, int size) +{ + response.append(part, size); +} + +std::string CacheEntry::getUrl() +{ + return url; +} diff --git a/CacheEntry.h b/CacheEntry.h new file mode 100644 index 0000000..f3f699a --- /dev/null +++ b/CacheEntry.h @@ -0,0 +1,64 @@ +//CacheEntry is ONLY used when in voting mode, non-voting mode does NOT have cache +//because Squid can easily do that for us(except MITM need more tricks). +#ifndef __CACHESTORE_H_ +#define __CACHESTORE_H_ + +#include +#include +#include + +#include "assert.h" + +enum HTTP_METHODS { + GET = 0, + POST, + METHOD_UNDEFINED, +}; + +enum REQUEST_CACHE_STATE { + CACHE_NEW = 0, + CACHE_FETCHING, + CACHE_IN, + CACHE_UNDEFINED, +}; + +class CacheEntry { + public: + CacheEntry(int num_browsers, std::string _url, std::string _request); + + virtual ~CacheEntry(); + + void setReqState(REQUEST_CACHE_STATE s); + + REQUEST_CACHE_STATE getReqState(); + + int updateReqVec(int browserId); + + int updateRespVec(int browserId); + + std::vector getReqVec() { return req_vec; }; + + std::string getRequest(); + + std::string getResponse(); + + void appendResponse(std::string part); + + void appendResponse(const char *part, int size); + + std::string getUrl(); + + protected: + int m_numBrowsers; + std::string url; + HTTP_METHODS method; + std::vector req_vec; + std::vector resp_vec; + std::string request; + std::string response; + volatile REQUEST_CACHE_STATE cache_state; +}; + + + +#endif diff --git a/HTTP.cpp b/HTTP.cpp new file mode 100644 index 0000000..79215ec --- /dev/null +++ b/HTTP.cpp @@ -0,0 +1,436 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "HTTP.h" + +#include +#include + +#include +#include + +using namespace std; + + +/***************************** HTTP Parser callbacks ************************/ + +int HTTP::message_begin_cb(http_parser *parser) +{ + HTTP *http = (HTTP *) parser->data; + assert(http->getState() == HTTP::INIT); + http->setState(HTTP::HEADER); + return 0; +} + +int HTTP::path_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->m_path.append(at, length); + return 0; +} +int HTTP::query_string_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->m_query.append(at, length); + return 0; +} + +int HTTP::url_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->appendUrl(at, length); + + return 0; +} + +int HTTP::fragment_cb(http_parser */*parser*/, const char */*at*/, size_t /*length*/) +{ + cout << "fragment" << endl; + assert(false); + return 0; +} + +int HTTP::header_field_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + + if(http->getState() == HTTP::FIELD) { + http->appendHeaderField(at, length); + } else if((http->getState() == HTTP::VALUE) || + (http->getState() == HTTP::HEADER)) { + http->newHeaderField(at, length); + http->setState(HTTP::FIELD); + } else { + assert(false); + } + + return 0; +} + +int HTTP::header_value_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + if(http->getState() == HTTP::FIELD) { + http->setState(HTTP::VALUE); + } + assert(http->getState() == HTTP::VALUE); + http->appendHeaderValue(at, length); + return 0; +} + +int HTTP::headers_complete_cb(http_parser *parser) +{ + HTTP *http = (HTTP *) parser->data; + http->addHeaderField(); + http->m_headerDone = true; + + if(http->m_httpType == HTTP_RESPONSE) { + char buf[64]; + sprintf(buf, "HTTP/%u.%u %u ", parser->http_major, parser->http_minor, parser->status_code); + http->m_statusStr = buf; + if(parser->status_code == 200) { + http->m_statusStr += "OK"; + } else if(parser->status_code == 204) { + http->m_statusStr += "No Content"; + } else if(parser->status_code == 301) { + http->m_statusStr += "Moved Permanently"; + } else if(parser->status_code == 302) { + http->m_statusStr += "Moved Temporarily"; + } else if(parser->status_code == 304) { + http->m_statusStr += "Not Modified"; + } else if(parser->status_code == 403) { + http->m_statusStr += "Forbidden"; + } else if(parser->status_code == 404) { + http->m_statusStr += "Not Found"; + } else if(parser->status_code == 408) { + http->m_statusStr += "Request Timeout"; + } else if(parser->status_code == 500) { + http->m_statusStr += "Internal Server Error"; + } else if(parser->status_code == 503) { + http->m_statusStr += "Service Unavailable"; + } else { + assert(false); + } + + http->m_extraParsedBytes = 1; + return -1; + } + + return 0; +} + +int HTTP::body_cb(http_parser *parser, const char *at, size_t length) +{ + HTTP *http = (HTTP *) parser->data; + http->m_body.append(at, length); + + return 0; +} + +int HTTP::message_complete_cb(http_parser *parser) +{ + HTTP *http = (HTTP *) parser->data; + assert((http->getState() == HTTP::VALUE) || + (http->getState() == HTTP::BODY)); + http->setState(HTTP::DONE); + http->messageComplete(parser->method); + return 0; +} + +/****************************************************************************/ + + + +/*************************** Public Functions *******************************/ + + +HTTP::HTTP(http_parser_type httpType) +{ + m_state = INIT; + http_parser_init(&m_parser, httpType); + m_doneParsing = false; + m_httpType = httpType; + m_headerDone = false; + + m_settings.on_message_begin = message_begin_cb; + m_settings.on_path = path_cb; + m_settings.on_query_string = query_string_cb; + m_settings.on_url = url_cb; + m_settings.on_fragment = fragment_cb; + m_settings.on_header_field = header_field_cb; + m_settings.on_header_value = header_value_cb; + m_settings.on_headers_complete = headers_complete_cb; + m_settings.on_body = body_cb; + m_settings.on_message_complete = message_complete_cb; + + m_parser.data = this; + + m_field = NULL; + m_value = NULL; + m_extraParsedBytes = 0; +} + +HTTP::~HTTP() +{ + if(m_field != NULL) { + delete m_field; + } + + if(m_value != NULL) { + delete m_value; + } + + for(unsigned int idx = 0; idx < m_headers.size(); idx++) { + delete m_headers[idx].first; + delete m_headers[idx].second; + } +} + +int HTTP::addData(const unsigned char *data, int len) +{ + if(m_doneParsing) { + assert(false); + } + int ret = http_parser_execute(&m_parser, &m_settings, (const char *) data, len); + ret += m_extraParsedBytes; + m_extraParsedBytes = 0; + return ret; +} + +string HTTP::getBody() +{ + return m_body; +} + +string HTTP::getUrl() +{ + return m_url; +} + +string HTTP::getHost() +{ + string host = (m_method == HTTP_CONNECT) ? m_url : m_host; + if(host.find(':') == string::npos) { + host += ":80"; + } + return host; +} + +bool HTTP::isHeaderDone() +{ + return m_headerDone; +} + +bool HTTP::isDone() +{ + return m_doneParsing; +} + +string HTTP::getReplyHeader() +{ + string reply; + + assert(m_httpType == HTTP_RESPONSE); + assert(m_statusStr.size() > 0); + + reply = m_statusStr + "\r\n"; + + bool foundConn = false; + for(unsigned int idx = 0; idx < m_headers.size(); idx++) { + string field = *(m_headers[idx].first); + string value = *(m_headers[idx].second); + + if(field == "Connection") { + value = "close"; + foundConn = true; + } + + reply += field + string(": ") + value + string("\r\n"); + } + + if(!foundConn) { + reply += "Connection: close\r\n"; + } + + reply += "\r\n"; + + return reply; +} + +string HTTP::getProxyRequest(const char *userAgent) +{ + string reply; + string urlPathQuery; + + assert(m_httpType == HTTP_REQUEST); + + if((m_method == HTTP_GET) || (m_method == HTTP_POST) || (m_method == HTTP_HEAD)) { + if(m_path.size() == 0) { + urlPathQuery = "/"; + } else { + urlPathQuery = m_path; + } + if(m_query.size() > 0) { + urlPathQuery += "?" + m_query; + } + if(m_url.find(urlPathQuery) == string::npos) { + // this is a hack to get around buggy HTML from taobao + assert(m_query.size() > 0); + urlPathQuery = m_path + "??" + m_query; + if(m_url.find(urlPathQuery) == string::npos) { + cout << "url path mismatch " << m_url << endl << urlPathQuery << endl; + } + } + } + + if(m_method == HTTP_GET) { + reply = "GET " + urlPathQuery + " HTTP/1.1\r\n"; + } else if(m_method == HTTP_CONNECT) { + reply = "CONNECT " + m_url + " HTTP/1.1\r\n"; + } else if(m_method == HTTP_POST) { + reply = "POST " + urlPathQuery + " HTTP/1.1\r\n"; + } else if(m_method == HTTP_HEAD) { + reply = "HEAD " + urlPathQuery + " HTTP/1.1\r\n"; + } else { + assert(false); + } + + for(unsigned int idx = 0; idx < m_headers.size(); idx++) { + string field = *(m_headers[idx].first); + string value = *(m_headers[idx].second); + + if((userAgent != NULL) && (field == "User-Agent")) { + value = string(userAgent); + } + + if(field == "Proxy-Connection") { + field = string("Connection"); + value = string("close"); + //value = string("keep-alive"); + } + + if(field != "Keep-Alive") { + reply += field + string(": ") + value + string("\r\n"); + } + } + + reply += string("\r\n"); + if(m_body.size() > 0) { + reply += m_body; + } + + if(m_method == HTTP_HEAD) { + cout << reply; + } + + return reply; +} + + +/****************************************************************************/ + + + +/************************** Private Functions *******************************/ + +HTTP::HttpState HTTP::getState() +{ + return m_state; +} + +void HTTP::setState(HttpState newState) +{ + m_state = newState; +} + +void HTTP::appendUrl(const char *at, size_t len) +{ + m_url.append(at, len); +} + +void HTTP::addHeaderField() +{ + if(m_field != NULL) { + assert(m_value != NULL); + if(*m_field == "Host") { + m_host = *m_value; + } + if(*m_field == "Eoh") { + cout << "got the Eoh header" << endl; + } + m_headers.insert(m_headers.end(), pair(m_field, m_value)); + m_field = NULL; + m_value = NULL; + } else { + assert(m_value == NULL); + } +} + +void HTTP::newHeaderField(const char *at, size_t len) +{ + addHeaderField(); + m_field = new string(at, len); + m_value = new string(); +} +void HTTP::appendHeaderField(const char *at, size_t len) +{ + assert(m_field != NULL); + m_field->append(at, len); +} + +void HTTP::appendHeaderValue(const char *at, size_t len) +{ + m_value->append(at, len); +} + +void HTTP::messageComplete(unsigned char method) +{ + if(m_httpType == HTTP_REQUEST) { + assert((method == HTTP_GET) || (method == HTTP_CONNECT) || (method == HTTP_POST) || (method == HTTP_HEAD)); + m_method = method; + } + m_doneParsing = true; +} + +/****************************************************************************/ diff --git a/HTTP.h b/HTTP.h new file mode 100644 index 0000000..aa7a35a --- /dev/null +++ b/HTTP.h @@ -0,0 +1,68 @@ +#ifndef _HTTP_H_ +#define _HTTP_H_ + +#include "http_parser.h" + +#include +#include +#include + +class HTTP { + public: + typedef enum {INIT, HEADER, FIELD, VALUE, BODY, DONE} HttpState; + + HTTP(http_parser_type httpType = HTTP_REQUEST); + ~HTTP(); + + int addData(const unsigned char *data, int len); + bool isDone(); + bool isHeaderDone(); + std::string getProxyRequest(const char *userAgent = NULL); + std::string getReplyHeader(); + std::string getHost(); + std::string getUrl(); + bool isConnect() {return m_method == HTTP_CONNECT;} + std::string getBody(); + + private: + static int message_begin_cb(http_parser *parser); + static int path_cb(http_parser *parser, const char *at, size_t length); + static int query_string_cb(http_parser *parser, const char *at, size_t length); + static int url_cb(http_parser *parser, const char *at, size_t length); + static int fragment_cb(http_parser *parser, const char *at, size_t length); + static int header_field_cb(http_parser *parser, const char *at, size_t length); + static int header_value_cb(http_parser *parser, const char *at, size_t length); + static int headers_complete_cb(http_parser *parser); + static int body_cb(http_parser *parser, const char *at, size_t length); + static int message_complete_cb(http_parser *parser); + + HttpState getState(); + void setState(HttpState newState); + void appendUrl(const char *at, size_t len); + void newHeaderField(const char *at, size_t len); + void appendHeaderField(const char *at, size_t len); + void appendHeaderValue(const char *at, size_t len); + void addHeaderField(); + void messageComplete(unsigned char method); + + http_parser_settings m_settings; + http_parser m_parser; + HttpState m_state; + bool m_doneParsing; + bool m_headerDone; + + std::string m_url; + std::string m_path; + std::string m_query; + std::string m_host; + std::string *m_field; + std::string *m_value; + std::vector< std::pair< std::string *, std::string *> > m_headers; + std::string m_body; + std::string m_statusStr; + unsigned char m_method; + http_parser_type m_httpType; + int m_extraParsedBytes; +}; + +#endif diff --git a/HTTPRequest.cpp b/HTTPRequest.cpp new file mode 100644 index 0000000..b1a9013 --- /dev/null +++ b/HTTPRequest.cpp @@ -0,0 +1,147 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + + +#include "HTTPRequest.h" + +#include +#include + +#include +#include + +#include "dbg.h" + +using namespace std; + +#define CONNECT_REPLY "HTTP/1.1 200 Connection Established\r\n\r\n" + +HTTPRequest::HTTPRequest(MySocket *sock, int serverPort) +{ + m_sock = sock; + m_http = new HTTP(); + m_serverPort = serverPort; + m_totalBytesRead = 0; + m_totalBytesWritten = 0; +} + +HTTPRequest::~HTTPRequest() +{ + delete m_http; +} + +void HTTPRequest::printDebugInfo() +{ + cerr << " isDone = " << m_http->isDone() << endl; + cerr << " bytesRead = " << m_totalBytesRead << endl; + cerr << " bytesWritte = " << m_totalBytesWritten << endl; + cerr << " url = " << m_http->getUrl() << endl; +} + +bool HTTPRequest::readRequest() +{ + assert(!m_http->isDone()); + unsigned char buf[1024]; + + int num_bytes; + while(!m_http->isDone()) { + num_bytes = m_sock->read(buf, sizeof(buf)); + if(num_bytes > 0) { + onRead(buf, (unsigned int) num_bytes); + } else { + cerr << "socket error" << endl; + return false; + } + } +// httpreq_dbg("req: %s\n", getUrl().c_str()); +// httpreq_dbg("req: %s\n", m_http->getProxyRequest().c_str()); + + return true; +} + + +void HTTPRequest::onRead(const unsigned char *buffer, unsigned int len) +{ + m_totalBytesRead += len; + + unsigned int bytesRead = 0; + assert(len > 0); + + while(bytesRead < len) { + assert(!m_http->isDone()); + int ret = m_http->addData(buffer + bytesRead, len - bytesRead); + assert(ret > 0); + bytesRead += ret; + + // This is a workaround for a parsing bug that sometimes + // crops up with connect commands. The parser will think + // it is done before it reads the last newline of some + // properly formatted connect requests + if(m_http->isDone() && (bytesRead < len)) { + if(m_http->isConnect() && ((len-bytesRead) == 1) && (buffer[bytesRead] == '\n')) { + break; + } else { + assert(false); + } + } + } +} + +string HTTPRequest::getHost() +{ + return m_http->getHost(); +} +string HTTPRequest::getRequest() +{ + return m_http->getProxyRequest(); +} +string HTTPRequest::getUrl() +{ + return m_http->getUrl(); +} + +bool HTTPRequest::isConnect() +{ + return m_http->isConnect(); +} diff --git a/HTTPRequest.h b/HTTPRequest.h new file mode 100644 index 0000000..182cda9 --- /dev/null +++ b/HTTPRequest.h @@ -0,0 +1,34 @@ +#ifndef HTTP_REQUEST_H_ +#define HTTP_REQUEST_H_ + +#include "MySocket.h" +#include "http_parser.h" +#include "HTTP.h" + +#include + +class HTTPRequest { + public: + HTTPRequest(MySocket *sock, int serverPort); + ~HTTPRequest(); + + bool readRequest(); + + std::string getHost(); + std::string getRequest(); + std::string getUrl(); + bool isConnect(); + + void printDebugInfo(); + + protected: + void onRead(const unsigned char *buffer, unsigned int len); + + MySocket *m_sock; + HTTP *m_http; + int m_serverPort; + unsigned long m_totalBytesRead; + unsigned long m_totalBytesWritten; +}; + +#endif diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ebf59c8 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +EXE = twproxy +all: $(EXE) + +CC = g++ +CFLAGS = -g -W -Wall -Wno-unused +LDFLAGS = -lpthread -lssl + +OBJS = main.o MyServerSocket.o MySocket.o HTTPRequest.o http_parser.o HTTP.o Cache.o CacheEntry.o + +-include $(OBJS:.o=.d) + +twproxy: $(OBJS) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) + +serv: serv.o + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) serv.o + +%.d: %.c + @set -e; $(CC) -MM $(CFLAGS) $< \ + | sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' > $@; + @[ -s $@ ] || rm -f $@ + +%.d: %.cpp + @set -e; $(CC) -MM $(CFLAGS) $< \ + | sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' > $@; + @[ -s $@ ] || rm -f $@ + +%.o: %.cpp + $(CC) $(CFLAGS) -c $< -o $@ + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -f $(EXE) *.o *~ core.* *.d diff --git a/MyServerSocket.cpp b/MyServerSocket.cpp new file mode 100644 index 0000000..58dcc24 --- /dev/null +++ b/MyServerSocket.cpp @@ -0,0 +1,94 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "MyServerSocket.h" +#include +#include +#include +#include +#include +#include +#include +#include + +MyServerSocket::MyServerSocket(int port) +{ + struct sockaddr_in server; + int one = 1; + + // set up the server socket + serverFd = socket(AF_INET,SOCK_STREAM,0); + + server.sin_family = AF_INET; + server.sin_addr.s_addr = INADDR_ANY; + server.sin_port = htons((short) port); + + if (setsockopt(serverFd,SOL_SOCKET,SO_REUSEADDR,&one,sizeof(int)) == -1) { + throw MySocketException("error with set socket opts"); + } + + if( bind(serverFd,(struct sockaddr *) &server, sizeof(server)) ==-1){ + char str[1024]; + sprintf(str,"could not bind to port %d",port); + throw MySocketException(str); + } + + //set up a listen queue + listen(serverFd, 10); +} + +MySocket *MyServerSocket::accept() +{ + //check that the sockFd is valid + + struct sockaddr_in client; + socklen_t len = sizeof(client); + int clientFd = ::accept(serverFd, (struct sockaddr *) &client, &len); + + if(clientFd<0) { + throw MySocketException("Accept function call failed"); + } + + return new MySocket(clientFd); +} diff --git a/MyServerSocket.h b/MyServerSocket.h new file mode 100644 index 0000000..df7c81b --- /dev/null +++ b/MyServerSocket.h @@ -0,0 +1,31 @@ +#ifndef MYSERVERSOCKET_H +#define MYSERVERSOCKET_H + +#include "MySocket.h" +#include "MySocketException.h" + +class MyServerSocket { + public: + /** + * creates a new server socket and binds it to the port specified. + * if it cannot bind, it will throw a socket exception. + * + * @param port the port to bind to + */ + MyServerSocket(int port); + MyServerSocket() { serverFd = -1; } + + /** + * this function will accept incoming requests to connect and + * return the resulting socket + */ + MySocket *accept(); + + int getFd() { return serverFd; } + protected: + int serverFd; + +}; + + +#endif diff --git a/MySocket.cpp b/MySocket.cpp new file mode 100644 index 0000000..bed99f7 --- /dev/null +++ b/MySocket.cpp @@ -0,0 +1,450 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include "MySocket.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "dbg.h" + +using namespace std; + +#define CHK_SSL(err) if ((err)==-1) { ERR_print_errors_fp(stderr); exit(2); } +#define CHK_NULL(x) if ((x)==NULL) exit (1) + +#define HOME "./" +#define CERTF HOME "cacert.pem" +#define KEYF HOME "privkey.pem" + + +MySocket::MySocket(const char *inetAddr, int port) +{ + struct sockaddr_in server; + struct addrinfo hints; + struct addrinfo *res; + + isSSL = false; + ctx = NULL; + ssl = NULL; + + // set up the new socket (TCP/IP) + sockFd = socket(AF_INET,SOCK_STREAM,0); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + int ret = getaddrinfo(inetAddr, NULL, &hints, &res); + if(ret != 0) { + string str; + str = string("Could not get host ") + string(inetAddr); + throw MySocketException(str.c_str()); + } + + server.sin_addr = ((struct sockaddr_in *) (res->ai_addr))->sin_addr; + server.sin_port = htons((short) port); + server.sin_family = AF_INET; + freeaddrinfo(res); + + // conenct to the server + if( connect(sockFd, (struct sockaddr *) &server, + sizeof(server)) == -1 ) { + throw MySocketException("Did not connect to the server"); + } + +} + +MySocket::MySocket(int socketFileDesc) +{ + sockFd = socketFileDesc; + isSSL = false; + ctx = NULL; + ssl = NULL; +} + +MySocket::~MySocket(void) +{ + close(); +} + + +EVP_PKEY * MySocket::readPublicKey(const char *certfile) +{ + FILE *fp = fopen (certfile, "r"); + X509 *x509; + EVP_PKEY *pkey; + if (!fp) + return NULL; + x509 = PEM_read_X509(fp, NULL, 0, NULL); + if (x509 == NULL) { + ERR_print_errors_fp (stderr); + return NULL; + } + fclose (fp); + pkey=X509_extract_key(x509); + X509_free(x509); + if (pkey == NULL) + ERR_print_errors_fp (stderr); + return pkey; +} + +EVP_PKEY *MySocket::readPrivateKey(const char *keyfile) +{ + FILE *fp = fopen(keyfile, "r"); + EVP_PKEY *pkey; + if (!fp) + return NULL; + pkey = PEM_read_PrivateKey(fp, NULL, 0, NULL); + fclose (fp); + if (pkey == NULL) + ERR_print_errors_fp (stderr); + return pkey; +} + +X509 *MySocket::readX509(const char *certfile) +{ + FILE *fp = fopen(certfile, "r"); + if(!fp) + return NULL; + //X509 *PEM_read_X509(FILE *fp, X509 **x, pem_password_cb *cb, void *u); + X509 *cert = PEM_read_X509(fp, NULL, NULL, NULL); CHK_NULL(cert); + fclose(fp); + return cert; +} + +X509 *MySocket::makeAndInitCert() +{ + X509 *new_cert = X509_new(); + X509_set_version(new_cert, 2); + long serialNumber = rand() % RAND_MAX; + ASN1_INTEGER_set(X509_get_serialNumber(new_cert), serialNumber); + X509_gmtime_adj(X509_get_notBefore(new_cert), 0); + X509_gmtime_adj(X509_get_notAfter(new_cert), (long)60*60*24*365); + return new_cert; +} + +void MySocket::initNewName(X509_NAME *new_name, X509_NAME *server_cert_subj_name) +{ + //now setup "CN" + int serv_cert_subjname_ent_num = X509_NAME_entry_count(server_cert_subj_name); + mitm_dbg("subject name entry number: %d\n", serv_cert_subjname_ent_num); + unsigned char *ent_data_str = NULL; + char *ent_obj_str = NULL; + X509_NAME_ENTRY *e = NULL; + ASN1_STRING *asn1_string = NULL; + ASN1_OBJECT *asn1_obj = NULL; + int n = -1; + for(int i; i < serv_cert_subjname_ent_num; i++) { + e = X509_NAME_get_entry(server_cert_subj_name, i); + asn1_string = X509_NAME_ENTRY_get_data(e); + asn1_obj = X509_NAME_ENTRY_get_object(e); + ASN1_STRING_to_UTF8(&ent_data_str, asn1_string); + n = OBJ_obj2nid(asn1_obj); + ent_obj_str = (char *)OBJ_nid2ln(n); + + mitm_dbg("name entry %d: %s %s\n", i, ent_obj_str, ent_data_str); + //XXX: ent_data_str and ent_obj_str are pointers to inside fields, should not free them + if(strncmp(ent_obj_str, "commonName", 10) == 0) { + mitm_dbg("setting CN to %s\n", ent_data_str); + if(!X509_NAME_add_entry_by_txt(new_name, "CN", MBSTRING_ASC, ent_data_str, -1, -1, 0)) { + ERR_print_errors_fp(stderr); + exit(7); + } + } + } + if(!X509_NAME_add_entry_by_txt(new_name, "C", MBSTRING_ASC, (const unsigned char *)"US", -1, -1, 0)) { + ERR_print_errors_fp(stderr); + exit(8); + } +} + +//commented some debug codes, keep them for now +X509 *MySocket::generateFakeCert(MySocket *clientSock) +{ + //do NOT free server_cert or these names, because enableSSLClient() is going to free it + + //get the certificate from the proxy <--> remotesite connection + X509 *server_cert = SSL_get_peer_certificate (clientSock->ssl); CHK_NULL(server_cert); + mitm_dbg("Original server certificate:\n"); + //also, do NOT free these two names + X509_NAME *server_cert_subj_name = X509_get_subject_name(server_cert); CHK_NULL(server_cert_subj_name); + X509_NAME *server_cert_issuer_name = X509_get_issuer_name(server_cert); CHK_NULL(server_cert_issuer_name); +/* + char * server_cert_subj_name_str = X509_NAME_oneline(server_cert_subj_name, 0, 0); + mitm_dbg("subject name: %s\n", server_cert_subj_name_str); + char *server_cert_issuer_name_str = X509_NAME_oneline(server_cert_issuer_name, 0, 0); + mitm_dbg("issuer name: %s\n", server_cert_issuer_name_str); + mitm_dbg("subject name entry number: %d\n", serv_cert_subjname_ent_num); + mitm_dbg("issuer name entry number: %d\n", X509_NAME_entry_count(server_cert_issuer_name)); +*/ + X509 *new_cert = makeAndInitCert(); + X509_NAME *new_name = X509_get_subject_name(new_cert); + initNewName(new_name, server_cert_subj_name); +/* + char *newname_subjname = X509_NAME_oneline(new_name, 0, 0); + mitm_dbg("new_name's subject name: %s\n", newname_subjname); +*/ + X509 *myCA = readX509(CERTF); CHK_NULL(myCA); + mitm_dbg("FAKE CA cert:\n"); + X509_NAME *caName = X509_get_subject_name(myCA); CHK_NULL(caName); +/* + char *caName_str = X509_NAME_oneline(caName, 0, 0); + mitm_dbg("FAKE CA name: %s\n", caName_str); + + X509_NAME *caIssuerName = X509_get_issuer_name(myCA); CHK_NULL(caIssuerName); + char *caIssuerName_str = X509_NAME_oneline(caIssuerName, 0, 0); + mitm_dbg("FAKE CA issuer name: %s\n", caIssuerName_str); +*/ + //set issuer name to myCA's subject name + X509_set_issuer_name(new_cert, caName); + //do NOT free these two keys, they are used by new_cert + EVP_PKEY *privKey = readPrivateKey(KEYF); CHK_NULL(privKey); + EVP_PKEY *pubKey = readPublicKey(CERTF); CHK_NULL(pubKey); + X509_set_pubkey(new_cert, pubKey); + //sign it + if(!X509_sign(new_cert, privKey, EVP_md5())) + CHK_NULL(NULL); + + return new_cert; +} + +void MySocket::close(void) +{ + if(sockFd<0) return; + + ::close(sockFd); + + sockFd = -1; + + isSSL = false; + + if(ssl != NULL) + SSL_free(ssl); + + if(ctx != NULL) + SSL_CTX_free(ctx); + + ssl = NULL; + ctx = NULL; +} + +void MySocket::enableSSLServer(MySocket *clientSock) +{ + if(sockFd < 0) return; + + ctx = SSL_CTX_new (SSLv23_server_method()); + if (!ctx) { + ERR_print_errors_fp(stderr); + exit(2); + } + + if (SSL_CTX_use_certificate_chain_file(ctx, CERTF) <= 0) { + ERR_print_errors_fp(stderr); + exit(3); + } + if (SSL_CTX_use_PrivateKey_file(ctx, KEYF, SSL_FILETYPE_PEM) <= 0) { + ERR_print_errors_fp(stderr); + exit(4); + } + + if (!SSL_CTX_check_private_key(ctx)) { + fprintf(stderr,"Private key does not match the certificate public key\n"); + exit(5); + } + + X509 *new_cert = generateFakeCert(clientSock); CHK_NULL(new_cert); + if(SSL_CTX_use_certificate(ctx, new_cert) != 1) { + ERR_print_errors_fp(stderr); + exit(6); + } + + ssl = SSL_new (ctx); CHK_NULL(ssl); + SSL_set_fd (ssl, sockFd); + int err = SSL_accept (ssl); CHK_SSL(err); + mitm_dbg("SSL connection using %s\n", SSL_get_cipher (ssl)); + isSSL = true; +} + +void MySocket::enableSSLClient(void) +{ + if(sockFd < 0) return; + + ctx = SSL_CTX_new (SSLv23_client_method()); + if (!ctx) { + ERR_print_errors_fp(stderr); + exit(2); + } + + ssl = SSL_new (ctx); CHK_NULL(ssl); + SSL_set_fd (ssl, sockFd); + int err = SSL_connect (ssl); CHK_SSL(err); + + mitm_dbg("SSL connection using %s\n", SSL_get_cipher (ssl)); + + X509 *server_cert = SSL_get_peer_certificate (ssl); CHK_NULL(server_cert); + mitm_dbg("Server certificate:\n"); + + char *str = X509_NAME_oneline (X509_get_subject_name (server_cert),0,0); + CHK_NULL(str); + mitm_dbg("\t subject: %s\n", str); + OPENSSL_free (str); + + str = X509_NAME_oneline (X509_get_issuer_name (server_cert),0,0); + CHK_NULL(str); + mitm_dbg("\t issuer: %s\n", str); + OPENSSL_free (str); + X509_free (server_cert); + isSSL = true; +} + +int MySocket::write(const void *buffer, int len) +{ + if(sockFd<0) return ENOT_CONNECTED; + + int ret; + + if(isSSL) { + ret = SSL_write(ssl, buffer, len); + } else { + ret = ::write(sockFd, buffer, len); + } + + if(ret != len) return ESOCKET_ERROR; + + return ret; +} + +bool MySocket::write_bytes(string buffer) +{ + return write_bytes(buffer.c_str(), buffer.size()); +} + +bool MySocket::write_bytes(const void *buffer, int len) +{ + const unsigned char *buf = (const unsigned char *) buffer; + int bytesWritten = 0; + + while(len > 0) { + bytesWritten = this->write(buf, len); + if(bytesWritten <= 0) { + return false; + } + buf += bytesWritten; + len -= bytesWritten; + } + + return true; + +} + +int MySocket::read(void *buffer, int len) +{ + if(sockFd<0) return ENOT_CONNECTED; + + int ret; + + if(isSSL) { + ret = SSL_read(ssl, buffer, len); + } else { + ret = ::read(sockFd, buffer, len); + } + + if(ret == 0) return ECONN_CLOSED; + if(ret < 0) return ESOCKET_ERROR; + + return ret; +} + + +/* +//we should not use default ctor anymore +MySocket::MySocket(void) +{ + sockFd = -1; + isSSL = false; + ctx = NULL; + ssl = NULL; +} +*/ +void MySocket::__enableSSLServer(void) +{ + if(sockFd < 0) return; + + ctx = SSL_CTX_new (SSLv23_server_method()); + if (!ctx) { + ERR_print_errors_fp(stderr); + exit(2); + } + + if (SSL_CTX_use_certificate_chain_file(ctx, CERTF) <= 0) { + ERR_print_errors_fp(stderr); + exit(3); + } + if (SSL_CTX_use_PrivateKey_file(ctx, KEYF, SSL_FILETYPE_PEM) <= 0) { + ERR_print_errors_fp(stderr); + exit(4); + } + + if (!SSL_CTX_check_private_key(ctx)) { + fprintf(stderr,"Private key does not match the certificate public key\n"); + exit(5); + } + + ssl = SSL_new (ctx); CHK_NULL(ssl); + SSL_set_fd (ssl, sockFd); + int err = SSL_accept (ssl); CHK_SSL(err); + + printf ("SSL connection using %s\n", SSL_get_cipher (ssl)); + isSSL = true; +} diff --git a/MySocket.h b/MySocket.h new file mode 100644 index 0000000..6b59383 --- /dev/null +++ b/MySocket.h @@ -0,0 +1,96 @@ +#ifndef MYSOCKET_H +#define MYSOCKET_H + +#define ENOT_CONNECTED -1 +#define EBROKEN_PIPE -2 +#define ECONN_CLOSED -3 +#define ESOCKET_ERROR -4 + +#include "MySocketException.h" + +#include +#include + +class MySocket { + public: + /* + * this is the constructor. It accepts a string representation of + * and ip address ("192.168.0.1") or domain name ("www.cs.uiuc.edu") + * and connects. Will throw an HostNotFound exception if the attepted + * connection fails. MySocket uses the TCP protocol. + * + * @param inetAddr either ip address, or the domain name + * @param port the port to connect to + */ + MySocket(const char *inetAddr, int port); + + /* + * this constructor will generally not be used except for by ServerSockets + */ + MySocket(int socketFileDesc); + + /* + * default constructor, makes sure the state is properly specified + */ + //hx: I don't think we should give default ctor anymore +// MySocket(void); + ~MySocket(void); + + /* + * reads the open socket. See the read system call + * + * @param buffer buffer of length len, where the data will be stored + * @param len the length of the buffer + * + * @return if there is no error, the number of bytes read in. + * ECONN_CLOSED - connection was closed + * EBROKEN_PIPE - broken pipe + * ENOT_CONNECTED - a connection was never established + */ + int read(void *buffer, int len); + + /* + * writes to the open socket, see the write system call. + * + * @param buffer the buffer where the data is stored + * @param len the length of the buffer + * + * @return if there is no error, the number of bytes wrote. + * ECONN_CLOSED - connection was closed + * EBROKEN_PIPE - broken pipe + * ENOT_CONNECTED - a connection was never established + */ + int write(const void *buffer, int len); + + bool write_bytes(std::string buffer); + bool write_bytes(const void *buffer, int len); + void __enableSSLServer(void); + void enableSSLServer(MySocket *); + void enableSSLClient(void); + + /* + * a helper function so select can be used + */ + int getFd(void) { return sockFd; } + + void close(void); + + protected: + //this is the function which generate a fake certificate, based on + //the proxy <--> remotesite connection. + X509 *generateFakeCert(MySocket *clentSock); + //these are helper functions to make fake certificate + EVP_PKEY *readPublicKey(const char *certfile); + EVP_PKEY *readPrivateKey(const char *keyfile); + X509 *readX509(const char *certfile); + X509 *makeAndInitCert(); + void initNewName(X509_NAME *new_name, X509_NAME *server_cert_subj_name); + + int sockFd; + void brokenPipe(int sigNo); + bool isSSL; + SSL_CTX *ctx; + SSL *ssl; +}; + +#endif diff --git a/MySocketException.h b/MySocketException.h new file mode 100644 index 0000000..981555f --- /dev/null +++ b/MySocketException.h @@ -0,0 +1,22 @@ +#ifndef MYSOCKETEXCEPTION_H +#define MYSOCKETEXCEPTION_H + +#define MSG_SIZE 100 + +#include + +class MySocketException { + public: + MySocketException(const char *message) { + strncpy(msg,message,MSG_SIZE-1); + } + + const char *toString() { + return msg; + } + + protected: + char msg[MSG_SIZE]; +}; + +#endif diff --git a/README b/README new file mode 100644 index 0000000..173e0d9 --- /dev/null +++ b/README @@ -0,0 +1,19 @@ +This is a very basic HTTP proxy. Currently it will create a new +thread for each new client connection and use blocking I/O calls to +get the request from the browser and then get a reply from the server. + +Normal proxy usage: ./twproxy +Voting proxy usage: ./twproxy -v + +To use the SSL/TLS MITM functionality you will need to generate a +private key and a self-signed certificate. To generate a key you can +use the following command: + +openssl genrsa -out privkey.pem 2048 + +and for a self-signed certificate using that private key: + +openssl req -new -x509 -key privkey.pem -out cacert.pem -days 1095 + +For more information about certificates and key, please refer to the +openssl documentation, which is where these examples came from. diff --git a/dbg.h b/dbg.h new file mode 100644 index 0000000..85d10e7 --- /dev/null +++ b/dbg.h @@ -0,0 +1,18 @@ +#ifndef _DBG_H_ +#define _DBG_H_ + +#include + +#define cache_dbg(...); + +#define httpreq_dbg(...); + +#define mitm_dbg(...); + +//#define cache_dbg(...) do{printf(__VA_ARGS__);}while(0); + +#define httpreq_dbg(...) do{printf(__VA_ARGS__);}while(0); + +//#define mitm_dbg(...) do{printf(__VA_ARGS__);}while(0); + +#endif diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..d788ad6 --- /dev/null +++ b/http_parser.c @@ -0,0 +1,1625 @@ +/* Copyright 2009,2010 Ryan Dahl + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include + + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +#define CALLBACK2(FOR) \ +do { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser)) return (p - data); \ + } \ +} while (0) + + +#define MARK(FOR) \ +do { \ + FOR##_mark = p; \ +} while (0) + +#define CALLBACK_NOCLEAR(FOR) \ +do { \ + if (FOR##_mark) { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser, \ + FOR##_mark, \ + p - FOR##_mark)) \ + { \ + return (p - data); \ + } \ + } \ + } \ +} while (0) + + +#define CALLBACK(FOR) \ +do { \ + CALLBACK_NOCLEAR(FOR); \ + FOR##_mark = NULL; \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { "DELETE" + , "GET" + , "HEAD" + , "POST" + , "PUT" + , "CONNECT" + , "OPTIONS" + , "TRACE" + , "COPY" + , "LOCK" + , "MKCOL" + , "MOVE" + , "PROPFIND" + , "PROPPATCH" + , "UNLOCK" + , "REPORT" + , "MKACTIVITY" + , "CHECKOUT" + , "MERGE" + }; + + +/* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header. + The 'A'-'Z' are lower-cased. */ +static const char acceptable_header[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', 0, 0, 0, 0, 0, 0, 0, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, 0, 0, 0, '-', 0, 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, 0, '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, 0, 0, 0, 0 }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', '!', '"', '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', '/', +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', '}', '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +static const uint8_t normal_url_char[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, 1, 1, 0, 1, 1, 1, 1, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1, 1, 1, 1, 1, 1, 1, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1, 1, 1, 1, 1, 1, 1, 0 }; + + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_host + , s_req_port + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + + , s_header_almost_done + + , s_headers_almost_done + /* Important: 's_headers_almost_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + , s_chunk_size_start + , s_chunk_size + , s_chunk_size_almost_done + , s_chunk_parameters + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + }; + + +#define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING)) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + }; + + +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_TRAILING = 1 << 3 + , F_UPGRADE = 1 << 4 + , F_SKIPBODY = 1 << 5 + }; + + +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define TOKEN(c) tokens[(unsigned char)c] + + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) if (cond) goto error +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + const char *p = data, *pe; + int64_t to_read; + + enum state state = (enum state) parser->state; + enum header_states header_state = (enum header_states) parser->header_state; + uint64_t index = parser->index; + uint64_t nread = parser->nread; + + if (len == 0) { + if (state == s_body_identity_eof) { + CALLBACK2(message_complete); + } + return 0; + } + + /* technically we could combine all of these (except for url_mark) into one + variable, saving stack space, but it seems more clear to have them + separated. */ + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *fragment_mark = 0; + const char *query_string_mark = 0; + const char *path_mark = 0; + const char *url_mark = 0; + + if (state == s_header_field) + header_field_mark = data; + if (state == s_header_value) + header_value_mark = data; + if (state == s_req_fragment) + fragment_mark = data; + if (state == s_req_query_string) + query_string_mark = data; + if (state == s_req_path) + path_mark = data; + if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash + || state == s_req_schema_slash_slash || state == s_req_port + || state == s_req_query_string_start || state == s_req_query_string + || state == s_req_host + || state == s_req_fragment_start || state == s_req_fragment) + url_mark = data; + + for (p=data, pe=data+len; p != pe; p++) { + ch = *p; + + if (PARSING_HEADER(state)) { + ++nread; + /* Buffer overflow attack */ + if (nread > HTTP_MAX_HEADER_SIZE) goto error; + } + + switch (state) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch == 'H') + state = s_res_or_resp_H; + else { + parser->type = HTTP_REQUEST; + goto start_req_method_assign; + } + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + state = s_res_HT; + } else { + if (ch != 'E') goto error; + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + index = 2; + state = s_req_method; + } + break; + + case s_start_res: + { + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + switch (ch) { + case 'H': + state = s_res_H; + break; + + case CR: + case LF: + break; + + default: + goto error; + } + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + state = s_res_HT; + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + state = s_res_HTT; + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + state = s_res_HTTP; + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + state = s_res_first_http_major; + break; + + case s_res_first_http_major: + if (ch < '1' || ch > '9') goto error; + parser->http_major = ch - '0'; + state = s_res_http_major; + break; + + /* major HTTP version or dot */ + case s_res_http_major: + { + if (ch == '.') { + state = s_res_first_http_minor; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) goto error; + break; + } + + /* first digit of minor HTTP version */ + case s_res_first_http_minor: + if (ch < '0' || ch > '9') goto error; + parser->http_minor = ch - '0'; + state = s_res_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_res_http_minor: + { + if (ch == ' ') { + state = s_res_first_status_code; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) goto error; + break; + } + + case s_res_first_status_code: + { + if (ch < '0' || ch > '9') { + if (ch == ' ') { + break; + } + goto error; + } + parser->status_code = ch - '0'; + state = s_res_status_code; + break; + } + + case s_res_status_code: + { + if (ch < '0' || ch > '9') { + switch (ch) { + case ' ': + state = s_res_status; + break; + case CR: + state = s_res_line_almost_done; + break; + case LF: + state = s_header_field_start; + break; + default: + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (parser->status_code > 999) goto error; + break; + } + + case s_res_status: + /* the human readable status. e.g. "NOT FOUND" + * we are not humans so just ignore this */ + if (ch == CR) { + state = s_res_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + state = s_header_field_start; + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch < 'A' || 'Z' < ch) goto error; + + start_req_method_assign: + parser->method = (enum http_method) 0; + index = 1; + switch (ch) { + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break; + case 'R': parser->method = HTTP_REPORT; break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; break; + default: goto error; + } + state = s_req_method; + break; + } + + case s_req_method: + { + if (ch == '\0') + goto error; + + const char *matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[index] == '\0') { + state = s_req_spaces_before_url; + } else if (ch == matcher[index]) { + ; /* nada */ + } else if (parser->method == HTTP_CONNECT) { + if (index == 1 && ch == 'H') { + parser->method = HTTP_CHECKOUT; + } else if (index == 2 && ch == 'P') { + parser->method = HTTP_COPY; + } + } else if (parser->method == HTTP_MKCOL) { + if (index == 1 && ch == 'O') { + parser->method = HTTP_MOVE; + } else if (index == 1 && ch == 'E') { + parser->method = HTTP_MERGE; + } else if (index == 2 && ch == 'A') { + parser->method = HTTP_MKACTIVITY; + } + } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') { + parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ + } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') { + parser->method = HTTP_PUT; + } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + parser->method = HTTP_PROPPATCH; + } else { + goto error; + } + + ++index; + break; + } + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + if (ch == '/') { + MARK(url); + MARK(path); + state = s_req_path; + break; + } + + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') { + MARK(url); + state = s_req_schema; + break; + } + + goto error; + } + + case s_req_schema: + { + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') break; + if (c >= '0' && c <= '9') break; + if (c == '-') break; + + if (ch == ':') { + state = s_req_schema_slash; + break; + } else if (ch == '.') { + state = s_req_host; + break; + } + + goto error; + } + + case s_req_schema_slash: + STRICT_CHECK(ch != '/'); + state = s_req_schema_slash_slash; + break; + + case s_req_schema_slash_slash: + STRICT_CHECK(ch != '/'); + state = s_req_host; + break; + + case s_req_host: + { + c = LOWER(ch); + if (c >= 'a' && c <= 'z') break; + if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break; + switch (ch) { + case ':': + state = s_req_port; + break; + case '/': + MARK(path); + state = s_req_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(url); + state = s_req_http_start; + break; + default: + goto error; + } + break; + } + + case s_req_port: + { + if (ch >= '0' && ch <= '9') break; + switch (ch) { + case '/': + MARK(path); + state = s_req_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com:1234 HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(url); + state = s_req_http_start; + break; + default: + goto error; + } + break; + } + + case s_req_path: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case ' ': + CALLBACK(url); + CALLBACK(path); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(path); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(path); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + CALLBACK(path); + state = s_req_query_string_start; + break; + case '#': + CALLBACK(path); + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_query_string_start: + { + if (normal_url_char[(unsigned char)ch]) { + MARK(query_string); + state = s_req_query_string; + break; + } + + switch (ch) { + case '?': + break; /* XXX ignore extra '?' ... is this right? */ + case ' ': + CALLBACK(url); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_query_string: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + break; + case ' ': + CALLBACK(url); + CALLBACK(query_string); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(query_string); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(query_string); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + CALLBACK(query_string); + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_fragment_start: + { + if (normal_url_char[(unsigned char)ch]) { + MARK(fragment); + state = s_req_fragment; + break; + } + + switch (ch) { + case ' ': + CALLBACK(url); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + MARK(fragment); + state = s_req_fragment; + break; + case '#': + break; + default: + goto error; + } + break; + } + + case s_req_fragment: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case ' ': + CALLBACK(url); + CALLBACK(fragment); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(fragment); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(fragment); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + case '#': + break; + default: + goto error; + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + state = s_req_http_H; + break; + case ' ': + break; + default: + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + state = s_req_http_HT; + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + state = s_req_http_HTT; + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + state = s_req_http_HTTP; + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + state = s_req_first_http_major; + break; + + /* first digit of major HTTP version */ + case s_req_first_http_major: + if (ch < '1' || ch > '9') goto error; + parser->http_major = ch - '0'; + state = s_req_http_major; + break; + + /* major HTTP version or dot */ + case s_req_http_major: + { + if (ch == '.') { + state = s_req_first_http_minor; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) goto error; + break; + } + + /* first digit of minor HTTP version */ + case s_req_first_http_minor: + if (ch < '0' || ch > '9') goto error; + parser->http_minor = ch - '0'; + state = s_req_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_req_http_minor: + { + if (ch == CR) { + state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (ch < '0' || ch > '9') goto error; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) goto error; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) goto error; + state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + state = s_headers_almost_done; + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + state = s_headers_almost_done; + goto headers_almost_done; + } + + c = TOKEN(ch); + + if (!c) goto error; + + MARK(header_field); + + index = 0; + state = s_header_field; + + switch (c) { + case 'c': + header_state = h_C; + break; + + case 'p': + header_state = h_matching_proxy_connection; + break; + + case 't': + header_state = h_matching_transfer_encoding; + break; + + case 'u': + header_state = h_matching_upgrade; + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + c = TOKEN(ch); + + if (c) { + switch (header_state) { + case h_general: + break; + + case h_C: + index++; + header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + index++; + header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + index++; + switch (c) { + case 'n': + header_state = h_matching_connection; + break; + case 't': + header_state = h_matching_content_length; + break; + default: + header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + index++; + if (index > sizeof(CONNECTION)-1 + || c != CONNECTION[index]) { + header_state = h_general; + } else if (index == sizeof(CONNECTION)-2) { + header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + index++; + if (index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[index]) { + header_state = h_general; + } else if (index == sizeof(PROXY_CONNECTION)-2) { + header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + index++; + if (index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[index]) { + header_state = h_general; + } else if (index == sizeof(CONTENT_LENGTH)-2) { + header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + index++; + if (index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[index]) { + header_state = h_general; + } else if (index == sizeof(TRANSFER_ENCODING)-2) { + header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + index++; + if (index > sizeof(UPGRADE)-1 + || c != UPGRADE[index]) { + header_state = h_general; + } else if (index == sizeof(UPGRADE)-2) { + header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + break; + } + + if (ch == ':') { + CALLBACK(header_field); + state = s_header_value_start; + break; + } + + if (ch == CR) { + state = s_header_almost_done; + CALLBACK(header_field); + break; + } + + if (ch == LF) { + CALLBACK(header_field); + state = s_header_field_start; + break; + } + + goto error; + } + + case s_header_value_start: + { + if (ch == ' ') break; + + MARK(header_value); + + state = s_header_value; + index = 0; + + c = acceptable_header[(unsigned char)ch]; + + if (!c) { + if (ch == CR) { + CALLBACK(header_value); + header_state = h_general; + state = s_header_almost_done; + break; + } + + if (ch == LF) { + CALLBACK(header_value); + state = s_header_field_start; + break; + } + + header_state = h_general; + break; + } + + switch (header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + header_state = h_matching_transfer_encoding_chunked; + } else { + header_state = h_general; + } + break; + + case h_content_length: + if (ch < '0' || ch > '9') goto error; + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + header_state = h_matching_connection_close; + } else { + header_state = h_general; + } + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + c = acceptable_header[(unsigned char)ch]; + + if (!c) { + if (ch == CR) { + CALLBACK(header_value); + state = s_header_almost_done; + break; + } + + if (ch == LF) { + CALLBACK(header_value); + goto header_almost_done; + } + break; + } + + switch (header_state) { + case h_general: + break; + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + if (ch == ' ') break; + if (ch < '0' || ch > '9') goto error; + parser->content_length *= 10; + parser->content_length += ch - '0'; + break; + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + index++; + if (index > sizeof(CHUNKED)-1 + || c != CHUNKED[index]) { + header_state = h_general; + } else if (index == sizeof(CHUNKED)-2) { + header_state = h_transfer_encoding_chunked; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + index++; + if (index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[index]) { + header_state = h_general; + } else if (index == sizeof(KEEP_ALIVE)-2) { + header_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + index++; + if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) { + header_state = h_general; + } else if (index == sizeof(CLOSE)-2) { + header_state = h_connection_close; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') header_state = h_general; + break; + + default: + state = s_header_value; + header_state = h_general; + break; + } + break; + } + + case s_header_almost_done: + header_almost_done: + { + STRICT_CHECK(ch != LF); + + state = s_header_field_start; + + switch (header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + break; + } + + case s_headers_almost_done: + headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + break; + } + + nread = 0; + + if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) { + parser->upgrade = 1; + } + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + return p - data; /* Error */ + } + } + + /* Exit, the rest of the connect is in a different protocol. */ + if (parser->upgrade) { + CALLBACK2(message_complete); + return (p - data); + } + + if (parser->flags & F_SKIPBODY) { + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else if (parser->content_length > 0) { + /* Content-Length header given and non-zero */ + state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { + /* Assume content-length 0 - read the next */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else { + /* Read body until EOF */ + state = s_body_identity_eof; + } + } + } + + break; + } + + case s_body_identity: + to_read = MIN(pe - p, (int64_t)parser->content_length); + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + parser->content_length -= to_read; + if (parser->content_length == 0) { + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } + } + break; + + /* read until EOF */ + case s_body_identity_eof: + to_read = pe - p; + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + } + break; + + case s_chunk_size_start: + { + assert(parser->flags & F_CHUNKED); + + c = unhex[(unsigned char)ch]; + if (c == -1) goto error; + parser->content_length = c; + state = s_chunk_size; + break; + } + + case s_chunk_size: + { + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + + c = unhex[(unsigned char)ch]; + + if (c == -1) { + if (ch == ';' || ch == ' ') { + state = s_chunk_parameters; + break; + } + goto error; + } + + parser->content_length *= 16; + parser->content_length += c; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + state = s_header_field_start; + } else { + state = s_chunk_data; + } + break; + } + + case s_chunk_data: + { + assert(parser->flags & F_CHUNKED); + + to_read = MIN(pe - p, (int64_t)(parser->content_length)); + + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + } + + if (to_read == parser->content_length) { + state = s_chunk_data_almost_done; + } + + parser->content_length -= to_read; + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != CR); + state = s_chunk_data_done; + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + state = s_chunk_size_start; + break; + + default: + assert(0 && "unhandled state"); + goto error; + } + } + + CALLBACK_NOCLEAR(header_field); + CALLBACK_NOCLEAR(header_value); + CALLBACK_NOCLEAR(fragment); + CALLBACK_NOCLEAR(query_string); + CALLBACK_NOCLEAR(path); + CALLBACK_NOCLEAR(url); + + parser->state = state; + parser->header_state = header_state; + parser->index = index; + parser->nread = nread; + + return len; + +error: + parser->state = s_dead; + return (p - data); +} + + +int +http_should_keep_alive (http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } else { + return 1; + } + } else { + /* HTTP/1.0 or earlier */ + if (parser->flags & F_CONNECTION_KEEP_ALIVE) { + return 1; + } else { + return 0; + } + } +} + + +const char * http_method_str (enum http_method m) +{ + return method_strings[m]; +} + + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->nread = 0; + parser->upgrade = 0; + parser->flags = 0; + parser->method = 0; +} diff --git a/http_parser.h b/http_parser.h new file mode 100644 index 0000000..0a76550 --- /dev/null +++ b/http_parser.h @@ -0,0 +1,176 @@ +/* Copyright 2009,2010 Ryan Dahl + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + + +#include +#ifdef _WIN32 + typedef __int8 int8_t; + typedef unsigned __int8 uint8_t; + typedef __int16 int16_t; + typedef unsigned __int16 uint16_t; + typedef __int32 int32_t; + typedef unsigned __int32 uint32_t; + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; + + typedef unsigned int size_t; + typedef int ssize_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#else +# define HTTP_PARSER_STRICT 0 +#endif + + +/* Maximium header size allowed */ +#define HTTP_MAX_HEADER_SIZE (80*1024) + + + typedef struct http_parser http_parser; + typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_path" + * each providing just a few characters more data. + */ + typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); + typedef int (*http_cb) (http_parser*); + + +/* Request Methods */ + enum http_method + { HTTP_DELETE = 0 + , HTTP_GET + , HTTP_HEAD + , HTTP_POST + , HTTP_PUT + /* pathological */ + , HTTP_CONNECT + , HTTP_OPTIONS + , HTTP_TRACE + /* webdav */ + , HTTP_COPY + , HTTP_LOCK + , HTTP_MKCOL + , HTTP_MOVE + , HTTP_PROPFIND + , HTTP_PROPPATCH + , HTTP_UNLOCK + /* subversion */ + , HTTP_REPORT + , HTTP_MKACTIVITY + , HTTP_CHECKOUT + , HTTP_MERGE + }; + + + enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + + struct http_parser { + /** PRIVATE **/ + unsigned char type : 2; + unsigned char flags : 6; + unsigned char state; + unsigned char header_state; + unsigned char index; + + uint32_t nread; + int64_t content_length; + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned short status_code; /* responses only */ + unsigned char method; /* requests only */ + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + char upgrade; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ + }; + + + struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_path; + http_data_cb on_query_string; + http_data_cb on_url; + http_data_cb on_fragment; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; + }; + + + void http_parser_init(http_parser *parser, enum http_parser_type type); + + + size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns true, then this will be should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ + int http_should_keep_alive(http_parser *parser); + +/* Returns a string version of the HTTP method. */ + const char *http_method_str(enum http_method); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..b17c904 --- /dev/null +++ b/main.cpp @@ -0,0 +1,263 @@ +/*======================================================== +** University of Illinois/NCSA +** Open Source License +** +** Copyright (C) 2011,The Board of Trustees of the University of +** Illinois. All rights reserved. +** +** Developed by: +** +** Research Group of Professor Sam King in the Department of Computer +** Science The University of Illinois at Urbana-Champaign +** http://www.cs.uiuc.edu/homes/kingst/Research.html +** +** Copyright (C) Sam King +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and associated documentation files (the +** Software), to deal with the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** Redistributions of source code must retain the above copyright notice, +** this list of conditions and the following disclaimers. +** +** Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimers in the +** documentation and/or other materials provided with the distribution. +** Neither the names of Sam King or the University of Illinois, +** nor the names of its contributors may be used to endorse or promote +** products derived from this Software without specific prior written +** permission. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. +**========================================================== +*/ + +#include +#include + +#include +#include +#include +#include +#include + +#include "MyServerSocket.h" +#include "HTTPRequest.h" +#include "Cache.h" +#include "dbg.h" +#include "time.h" + +using namespace std; + +int serverPorts[] = {8808, 8809, 8810}; +#define NUM_SERVERS (sizeof(serverPorts) / sizeof(serverPorts[0])) + +static string CONNECT_REPLY = "HTTP/1.1 200 Connection Established\r\n\r\n"; + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static unsigned long numThreads = 0; + +struct client_struct { + MySocket *sock; + int serverPort; + queue *killQueue; +}; + +struct server_struct { + int serverPort; +}; + +pthread_t server_threads[NUM_SERVERS]; +static int gVOTING = 0; + +void run_client(MySocket *sock, int serverPort) +{ + HTTPRequest *request = new HTTPRequest(sock, serverPort); + +// httpreq_dbg("%d: ", serverPort); + if(!request->readRequest()) { + cout << "did not read request" << endl; + } else { + bool error = false; + bool isSSL = false; + + string host = request->getHost(); + string url = request->getUrl(); + + MySocket *replySock = NULL; + + if(request->isConnect()) { +// cerr << serverPort << " connect request for " << host << " " << url << endl; + if(!sock->write_bytes(CONNECT_REPLY)) { + error = true; + } else { + delete request; + replySock = cache()->getReplySocket(host, true); + //need proxy <--> remotesite socket for information needed to fake a certificate + sock->enableSSLServer(replySock); + isSSL = true; + request = new HTTPRequest(sock, serverPort); + if(!request->readRequest()) { + error = true; + } + } + } else + replySock = cache()->getReplySocket(host, false); + + if(!error) { + string req = request->getRequest(); + if(gVOTING == 0) { + cache()->getHTTPResponseNoVote(host, req, url, serverPort, sock, isSSL, replySock); + } else { +// if(isSSL == true) +// cache()->getHTTPResponseVote(host, req, url, serverPort, sock, isSSL, replySock); +// else + cache()->getHTTPResponseVote(host, req, url, serverPort, sock, isSSL, replySock); + } + + } + } + + sock->close(); + delete request; +} + +void *client_thread(void *arg) +{ + struct client_struct *cs = (struct client_struct *) arg; + MySocket *sock = cs->sock; + int serverPort = cs->serverPort; + queue *killQueue = cs->killQueue; + + delete cs; + + pthread_mutex_lock(&mutex); + numThreads++; + //cout << "numThread = " << numThreads << endl; + pthread_mutex_unlock(&mutex); + + run_client(sock, serverPort); + + pthread_mutex_lock(&mutex); + numThreads--; + //cout << "numThread = " << numThreads << endl; + + // This is a hack because linux is having trouble freeing memory + // in a different thread, so instead we will let the server thread + // free this memory + killQueue->push(sock); + pthread_mutex_unlock(&mutex); + + return NULL; +} + +void start_client(MySocket *sock, int serverPort, queue *killQueue) +{ + struct client_struct *cs = new struct client_struct; + cs->sock = sock; + cs->serverPort = serverPort; + cs->killQueue = killQueue; + + pthread_t tid; + int ret = pthread_create(&tid, NULL, client_thread, cs); + assert(ret == 0); + ret = pthread_detach(tid); + assert(ret == 0); +} + +void *server_thread(void *arg) +{ + struct server_struct *ss = (struct server_struct *)arg; + int port = ss->serverPort; + delete ss; + + MyServerSocket *server = new MyServerSocket(port); + assert(server != NULL); + MySocket *client; + queue killQueue; + while(true) { + try { + client = server->accept(); + } catch(MySocketException e) { + cerr << e.toString() << endl; + exit(1); + } + pthread_mutex_lock(&mutex); + while(killQueue.size() > 0) { + delete killQueue.front(); + killQueue.pop(); + } + pthread_mutex_unlock(&mutex); + start_client(client, port, &killQueue); + } + return NULL; +} + + +pthread_t start_server(int port) +{ + cerr << "starting server on port " << port << endl; + server_struct *ss = new struct server_struct; + ss->serverPort = port; + pthread_t tid; + int ret = pthread_create(&tid, NULL, server_thread, ss); + assert(ret == 0); + return tid; +} + +static void get_opts(int argc, char *argv[]) +{ + int c; + while((c = getopt(argc, argv, "v")) != EOF) { + switch(c) { + case 'v': + gVOTING = 1; + break; + default: + cerr << "Wrong Argument." << endl; + exit(1); + break; + } + } +} +int main(int argc, char *argv[]) +{ + //if started with "-v" option, voting will be enabled. Otherwise, just a plain + //proxy + get_opts(argc, argv); + // get socket write errors from write call + signal(SIGPIPE, SIG_IGN); + + // initialize ssl library + SSL_load_error_strings(); + SSL_library_init(); + + cout << "number of servers: " << NUM_SERVERS << endl; + + //when generating serial number for X509, need random number + srand(time(NULL)); + Cache::setNumBrowsers(NUM_SERVERS); + + pthread_t tid; + int ret; + for(unsigned int idx = 0; idx < NUM_SERVERS; idx++) { + tid = start_server(serverPorts[idx]); + server_threads[idx] = tid; + } + + for(unsigned int idx = 0; idx < NUM_SERVERS; idx++) { + ret = pthread_join(server_threads[idx], NULL); + assert(ret == 0); + } + return 0; +}