Skip to content

Commit

Permalink
Merge pull request #69 from FlameOfIgnis/dev-ata
Browse files Browse the repository at this point in the history
Performance updates
  • Loading branch information
ignis-sec authored Jun 10, 2021
2 parents 361ccaa + 588b223 commit ad6f41b
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 153 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ PYTHON_VERSION3 :=$(shell python$(PYTHON_VERSION) -c "import sys;t='{v[0]}.{v[1]
##################################### MarkovPassword project options #################################
##############################################################################################################

MP_C_FLAGS := -Wall -Wextra -g
MP_C_FLAGS := -Wall -Wextra -g -Ofast
MP_EXEC := Markov
MP_SRC := $(shell find ./MarkovPasswords/src/ -name '*.cpp')
MP_INC :=
Expand All @@ -44,7 +44,7 @@ MM_SRC := $(shell find $(MM_SRC_DIR) -name '*.cpp')
MM_OBJS := $(MM_SRC:%=$(BIN)/%.o)
MM_DEPS := $(MM_OBJS:.o=.d)
MM_LDFLAGS := -shared
MM_C_FLAGS := $(MM_INC_FLAGS) -MMD -MP
MM_C_FLAGS := $(MM_INC_FLAGS) -MMD -MP -Ofast
MM_INC_DIRS := $(shell find $(MM_SRC_DIR) -type d)
MM_INC_FLAGS := $(addprefix -I,$(MM_INC_DIRS))
MM_LIB := model.so
Expand All @@ -69,7 +69,7 @@ MPY_SRC_DIR := Markopy/src/
MPY_OBJS := $(MPY_SRC:%=$(BIN)/%.o)
MPY_DEPS := $(MPY_OBJS:.o=.d)
MPY_LDFLAGS := -shared -lboost_python$(PYTHON_VERSION_) -lpython$(PYTHON_VERSION) -lpthread
MPY_C_FLAGS := $(MPY_INC_FLAGS) -MMD -MP -fPIC -I/usr/include/python$(PYTHON_VERSION)
MPY_C_FLAGS := $(MPY_INC_FLAGS) -MMD -MP -fPIC -I/usr/include/python$(PYTHON_VERSION) -Ofast
MPY_INC_DIRS := $(shell find $(MPY_SRC_DIR) -type d) $(shell pwd)
MPY_INC_FLAGS := $(addprefix -I,$(MPY_INC_DIRS))
MPY_SO := markopy.so
Expand Down
118 changes: 75 additions & 43 deletions Markopy/src/CLI/markopy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@

parser = argparse.ArgumentParser(description="Python wrapper for MarkovPasswords.",
epilog=f"""Sample runs:
{__file__} train -i untrained.mdl -d dataset.dat -s "\\t" -o trained.mdl
{__file__} train untrained.mdl -d dataset.dat -s "\\t" -o trained.mdl
Import untrained.mdl, train it with dataset.dat which has tab delimited data, output resulting model to trained.mdl\n
{__file__} generate -i trained.mdl -n 500 -w output.txt
{__file__} generate trained.mdl -n 500 -w output.txt
Import trained.mdl, and generate 500 lines to output.txt
{__file__} combine -i untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt
{__file__} combine untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt
Train and immediately generate 500 lines to output.txt. Do not export trained model.
{__file__} combine -i untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt -o trained.mdl
{__file__} combine untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt -o trained.mdl
Train and immediately generate 500 lines to output.txt. Export trained model.
""", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("mode", help="Operation mode, supported modes: \"generate\", \"train\" and \"combine\".")
Expand All @@ -29,12 +29,13 @@
parser.add_argument("-n", "--count", help="Number of lines to generate. Ignored in training mode.")
parser.add_argument("-t", "--threads",default=10, help="Number of lines to generate. Ignored in training mode.")
parser.add_argument("-v", "--verbosity",action="count", help="Output verbosity.")
parser.add_argument("-b", "--bulk",action="store_true", help="Bulk generate or bulk train every corpus/model in the folder.")
args = parser.parse_args()




def cli_init():
def cli_init(input_model):
logging.VERBOSITY = 0
if args.verbosity:
logging.VERBOSITY = args.verbosity
Expand All @@ -46,72 +47,103 @@ def cli_init():

logging.pprint("Importing model file.", 1)

if(not os.path.isfile(args.input)):
logging.pprint(f"Model file at {args.input} not found. Check the file path, or working directory")
if(not os.path.isfile(input_model)):
logging.pprint(f"Model file at {input_model} not found. Check the file path, or working directory")
exit(1)

model.Import(args.input)
model.Import(input_model)
logging.pprint("Model imported successfully.", 2)
return model

def cli_train(model, output_forced=False):
if not (args.dataset and args.seperator and (args.output or not output_forced)):
def cli_train(model, dataset, seperator, output, output_forced=False, bulk=False):
if not (dataset and seperator and (output or not output_forced)):
logging.pprint(f"Training mode requires -d/--dataset{', -o/--output' if output_forced else''} and -s/--seperator parameters. Exiting.")
exit(2)

if(not os.path.isfile(args.dataset)):
logging.pprint(f"{args.dataset} doesn't exists. Check the file path, or working directory")
if(not bulk and not os.path.isfile(dataset)):
logging.pprint(f"{dataset} doesn't exists. Check the file path, or working directory")
exit(3)

if(args.output and os.path.isfile(args.output)):
logging.pprint(f"{args.output} exists and will be overwritten.",1 )
if(output and os.path.isfile(output)):
logging.pprint(f"{output} exists and will be overwritten.",1 )

if(args.seperator == '\\t'):
if(seperator == '\\t'):
logging.pprint("Escaping seperator.", 3)
args.seperator = '\t'
seperator = '\t'

if(len(args.seperator)!=1):
logging.pprint(f'Delimiter must be a single character, and "{args.seperator}" is not accepted.')
if(len(seperator)!=1):
logging.pprint(f'Delimiter must be a single character, and "{seperator}" is not accepted.')
exit(4)

logging.pprint(f'Starting training.', 3)
model.Train(args.dataset,args.seperator, int(args.threads))
model.Train(dataset,seperator, int(args.threads))
logging.pprint(f'Training completed.', 2)

if(args.output):
logging.pprint(f'Exporting model.', 2)
model.Export(args.output)
if(output):
logging.pprint(f'Exporting model to {output}', 2)
model.Export(output)
else:
logging.pprint(f'Model will not be exported.', 1)

def cli_generate(model):
if not (args.wordlist and args.count):
def cli_generate(model, wordlist, bulk=False):
if not (wordlist or args.count):
logging.pprint("Generation mode requires -w/--wordlist and -n/--count parameters. Exiting.")
exit(2)

if(os.path.isfile(args.wordlist)):
logging.pprint(f"{args.wordlist} exists and will be overwritten.", 1)
if(bulk and os.path.isfile(wordlist)):
logging.pprint(f"{wordlist} exists and will be overwritten.", 1)
model.Generate(int(args.count), wordlist, int(args.min), int(args.max), int(args.threads))


if(args.bulk):
logging.pprint(f"Bulk mode operation chosen.", 4)

if (args.mode.lower() == "train"):
if (os.path.isdir(args.output) and not os.path.isfile(args.output)) and (os.path.isdir(args.dataset) and not os.path.isfile(args.dataset)):
corpus_list = os.listdir(args.dataset)
for corpus in corpus_list:
model = cli_init(args.input)
logging.pprint(f"Training {args.input} with {corpus}", 2)
output_file_name = corpus
model_extension = ""
if "." in args.input:
model_extension = args.input.split(".")[-1]
cli_train(model, f"{args.dataset}/{corpus}", args.seperator, f"{args.output}/{corpus}.{model_extension}", output_forced=True, bulk=True)
else:
logging.pprint("In bulk training, output and dataset should be a directory.")
exit(1)

elif (args.mode.lower() == "generate"):
if (os.path.isdir(args.wordlist) and not os.path.isfile(args.wordlist)) and (os.path.isdir(args.input) and not os.path.isfile(args.input)):
model_list = os.listdir(args.input)
print(model_list)
for input in model_list:
logging.pprint(f"Generating from {args.input}/{input} to {args.wordlist}/{input}.txt", 2)

model = cli_init(f"{args.input}/{input}")
model_base = input
if "." in args.input:
model_base = input.split(".")[1]
cli_generate(model, f"{args.wordlist}/{model_base}.txt", bulk=True)
else:
logging.pprint("In bulk generation, input and wordlist should be directory.")

model.Generate(int(args.count), args.wordlist, int(args.min), int(args.max), int(args.threads))




model = cli_init()
if (args.mode.lower() == "generate"):
cli_generate(model)
else:
model = cli_init(args.input)
if (args.mode.lower() == "generate"):
cli_generate(model, args.wordlist)


elif (args.mode.lower() == "train"):
cli_train(model, output_forced=True)
elif (args.mode.lower() == "train"):
cli_train(model, args.dataset, args.seperator, args.output, output_forced=True)


elif(args.mode.lower() == "combine"):
cli_train(model)
cli_generate(model)
elif(args.mode.lower() == "combine"):
cli_train(model, args.dataset, args.seperator, args.output)
cli_generate(model, args.wordlist)


else:
logging.pprint("Invalid mode arguement given.")
logging.pprint("Accepted modes: 'Generate', 'Train', 'Combine'")
exit(5)
else:
logging.pprint("Invalid mode arguement given.")
logging.pprint("Accepted modes: 'Generate', 'Train', 'Combine'")
exit(5)
18 changes: 9 additions & 9 deletions MarkovModel/src/edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ namespace Markov {
* Adds the offset parameter to the edge EdgeWeight.
* @param offset - NodeValue to be added to the EdgeWeight
*/
void AdjustEdge(uint64_t offset);
void AdjustEdge(long int offset);

/** @brief Traverse this edge to RightNode.
* @return Right node. If this is a terminator node, return NULL
*/
Node<NodeStorageType>* TraverseNode();
inline Node<NodeStorageType>* TraverseNode();

/** @brief Set LeftNode of this edge.
* @param node - Node to be linked with.
Expand All @@ -48,7 +48,7 @@ namespace Markov {
/** @brief return edge's EdgeWeight.
* @return edge's EdgeWeight.
*/
uint64_t EdgeWeight();
inline uint64_t EdgeWeight();

/** @brief return edge's LeftNode
* @return edge's LeftNode.
Expand All @@ -58,12 +58,12 @@ namespace Markov {
/** @brief return edge's RightNode
* @return edge's RightNode.
*/
Node<NodeStorageType>* RightNode();
inline Node<NodeStorageType>* RightNode();

private:
Node<NodeStorageType>* _left; /** @brief source node*/
Node<NodeStorageType>* _right;/** @brief target node*/
int _weight; /** @brief Edge EdgeWeight*/
long int _weight; /** @brief Edge EdgeWeight*/
};


Expand All @@ -85,13 +85,13 @@ Markov::Edge<NodeStorageType>::Edge(Markov::Node<NodeStorageType>* _left, Markov
}
//to AdjustEdge the edges by the edge with its offset
template <typename NodeStorageType>
void Markov::Edge<NodeStorageType>::AdjustEdge(uint64_t offset) {
void Markov::Edge<NodeStorageType>::AdjustEdge(long int offset) {
this->_weight += offset;
this->LeftNode()->UpdateTotalVerticeWeight(offset);
}
//to TraverseNode the node
template <typename NodeStorageType>
Markov::Node<NodeStorageType>* Markov::Edge<NodeStorageType>::TraverseNode() {
inline Markov::Node<NodeStorageType>* Markov::Edge<NodeStorageType>::TraverseNode() {
if (this->RightNode()->NodeValue() == 0xff) //terminator node
return NULL;
return _right;
Expand All @@ -108,7 +108,7 @@ void Markov::Edge<NodeStorageType>::SetRightEdge(Markov::Node<NodeStorageType>*
}
//to get the EdgeWeight of the node
template <typename NodeStorageType>
uint64_t Markov::Edge<NodeStorageType>::EdgeWeight() {
inline uint64_t Markov::Edge<NodeStorageType>::EdgeWeight() {
return this->_weight;
}
//to get the LeftNode of the node
Expand All @@ -118,7 +118,7 @@ Markov::Node<NodeStorageType>* Markov::Edge<NodeStorageType>::LeftNode() {
}
//to get the RightNode of the node
template <typename NodeStorageType>
Markov::Node<NodeStorageType>* Markov::Edge<NodeStorageType>::RightNode() {
inline Markov::Node<NodeStorageType>* Markov::Edge<NodeStorageType>::RightNode() {
return this->_right;
}

Expand Down
52 changes: 26 additions & 26 deletions MarkovModel/src/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <fstream>
#include <assert.h>
#include <string>
#include <algorithm>
#include "node.h"
#include "edge.h"

Expand Down Expand Up @@ -38,7 +39,7 @@ namespace Markov {
* Start from the starter node, invoke RandomNext on current node until terminator node is reached.
* @return Null terminated string that was generated.
*/
NodeStorageType* RandomWalk(int minSetting, int maxSetting);
NodeStorageType* RandomWalk(Markov::Random::RandomEngine* randomEngine, int minSetting, int maxSetting, NodeStorageType* buffer);

/** @brief Adjust the model with a single string.
* Start from the starter node, and for each character, AdjustEdge the edge EdgeWeight from current node to the next, until NULL character is reached.
Expand Down Expand Up @@ -113,15 +114,15 @@ bool Markov::Model<NodeStorageType>::Import(std::ifstream* f) {

char src;
char target;
int oc;
long int oc;

while (std::getline(*f, cell)) {
//std::cout << "cell: " << cell << std::endl;
src = cell[0];
target = cell[cell.length() - 1];
oc = std::atoi(cell.substr(2, cell.length() - 2).c_str());


char* j;
oc = std::strtol(cell.substr(2, cell.length() - 2).c_str(),&j,10);
//std::cout << oc << "\n";
Markov::Node<NodeStorageType>* srcN;
Markov::Node<NodeStorageType>* targetN;
Markov::Edge<NodeStorageType>* e;
Expand Down Expand Up @@ -151,6 +152,15 @@ bool Markov::Model<NodeStorageType>::Import(std::ifstream* f) {

}

for (std::pair<unsigned char, Markov::Node<NodeStorageType>*> const& x : this->nodes) {
//std::cout << "Total edges in EdgesV: " << x.second->edgesV.size() << "\n";
std::sort (x.second->edgesV.begin(), x.second->edgesV.end(), [](Edge<NodeStorageType> *lhs, Edge<NodeStorageType> *rhs)->bool{
return lhs->EdgeWeight() > rhs->EdgeWeight();
});
//for(int i=0;i<x.second->edgesV.size();i++)
// std::cout << x.second->edgesV[i]->EdgeWeight() << ", ";
//std::cout << "\n";
}
//std::cout << "Total number of nodes: " << this->nodes.size() << std::endl;
//std::cout << "Total number of edges: " << this->edges.size() << std::endl;

Expand Down Expand Up @@ -185,43 +195,33 @@ bool Markov::Model<NodeStorageType>::Export(const char* filename) {
}

template <typename NodeStorageType>
NodeStorageType* Markov::Model<NodeStorageType>::RandomWalk(int minSetting, int maxSetting) {
NodeStorageType* Markov::Model<NodeStorageType>::RandomWalk(Markov::Random::RandomEngine* randomEngine, int minSetting, int maxSetting, NodeStorageType* buffer) {
Markov::Node<NodeStorageType>* n = this->starterNode;
int len = 0;
NodeStorageType* ret = new NodeStorageType[64];
Markov::Node<NodeStorageType>* temp_node;
while (n != NULL) {
//n = n->RandomNext();
temp_node = n->RandomNext();
//dirty cutoff, needs better solution
if (len == 60)
break;
if (len > maxSetting) {
//std::cout<<"MAX ->"<< "node*: " << temp_node << ", len: " << len << "\n";
while (true) {
temp_node = n->RandomNext(randomEngine);
if (len >= maxSetting) {
break;
}

if ((temp_node == NULL) && (len < minSetting)) {
//std::cout << "node*: " << temp_node << ", len: " << len << "\n";
else if ((temp_node == NULL) && (len < minSetting)) {
continue;
}

if (temp_node == NULL)
else if (temp_node == NULL){
break;
}

n = temp_node;

//std::cout << n->NodeValue();
ret[len++] = n->NodeValue();

//maximum character length exceeded and stack will overflow.
//assert(len<32 && "return buffer overflowing, this will segfault if not aborted.");
buffer[len++] = n->NodeValue();
}

//null terminate the string
ret[len] = 0x00;
buffer[len] = 0x00;

//do something with the generated string
return ret; //for now
return buffer; //for now
}

template <typename NodeStorageType>
Expand Down
Loading

0 comments on commit ad6f41b

Please sign in to comment.