-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathTokenizer.cpp
52 lines (42 loc) · 1.21 KB
/
Tokenizer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#include "Tokenizer.h"
#include <boost/regex.hpp>
#include <cassert>
#include <sstream>
#include <vector>
using namespace bencode;
void Tokenizer::tokenize(const std::string& encoded,
std::deque<std::string>& tokens)
{
boost::regex e("([idel])|(\\d+):|(-?\\d+)");
boost::match_results<std::string::const_iterator> what;
boost::match_flag_type flags = boost::match_extra;
int i = 0;
while (i < static_cast<int>(encoded.size())) {
std::string current = encoded.substr(i, encoded.size() - i);
if(boost::regex_search(current, what, e, flags)) {
assert(what.size() == 4);
if (!what[2].str().empty()) {
// Found a string
std::istringstream stream(what[2].str());
unsigned int size = what[2].str().size() + 1; // + 1 for the colon
unsigned int value;
stream >> value;
tokens.push_back("s");
if (size + value > current.size()) {
tokens.clear();
throw std::invalid_argument("Incorrectly sized string");
}
tokens.push_back(current.substr(size, value));
i += value + size;
}
else {
tokens.push_back(what[0].str());
i += what[0].str().size();
}
}
else {
tokens.clear();
throw std::invalid_argument("Invalid formatted input.");
}
}
}