Skip to content

Commit

Permalink
added check for ticker must contain a vowel
Browse files Browse the repository at this point in the history
  • Loading branch information
danielkberry committed Jul 23, 2017
1 parent 5719a96 commit cd55de0
Showing 1 changed file with 109 additions and 91 deletions.
200 changes: 109 additions & 91 deletions pronounceable.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,153 +8,171 @@ var threshold = 0.001;

// Load probabilities from JSON files.
var tuples = JSON.parse(
fs.readFileSync(path.resolve(__dirname, "data/tuples.json"), "utf8")
fs.readFileSync(path.resolve(__dirname, "data/tuples.json"), "utf8")
);
var triples = JSON.parse(
fs.readFileSync(path.resolve(__dirname, "data/triples.json"), "utf8")
fs.readFileSync(path.resolve(__dirname, "data/triples.json"), "utf8")
);

// Remove any non-alphabet characters
// and convert to lower case.
function clean(word) {
return word.replace(/[^a-zA-Z]/g, "").toLowerCase();
return word.replace(/[^a-zA-Z]/g, "").toLowerCase();
}

// Make a percentage.
function percent(score, count) {
return score / count * 100;
return score / count * 100;
}

// Check for undefined probabilities.
function undef(w, i, depth, probs) {
if (depth <= 1) return typeof probs[w[i]] === "undefined";
if (typeof probs[w[i]] === "undefined") return true;
return undef(w, i + 1, depth - 1, probs[w[i]]);
if (depth <= 1) return typeof probs[w[i]] === "undefined";
if (typeof probs[w[i]] === "undefined") return true;
return undef(w, i + 1, depth - 1, probs[w[i]]);
}

// Extract probabilities of word t uple.
function trainTuples(words) {
var probs = {};
var count = 0;

words.forEach(function(w) {
w = clean(w);

for (var i = 0; i < w.length - 1; i++) {
if (!probs[w[i]]) probs[w[i]] = {};
if (!probs[w[i]][w[i + 1]]) probs[w[i]][w[i + 1]] = 1;
else probs[w[i]][w[i + 1]]++;
count++;
}
});
var probs = {};
var count = 0;

words.forEach(function(w) {
w = clean(w);

for (var i = 0; i < w.length - 1; i++) {
if (!probs[w[i]]) probs[w[i]] = {};
if (!probs[w[i]][w[i + 1]]) probs[w[i]][w[i + 1]] = 1;
else probs[w[i]][w[i + 1]]++;
count++;
}
});

Object.keys(probs).forEach(function(first) {
Object.keys(probs[first]).forEach(function(second) {
probs[first][second] = percent(probs[first][second], count);
Object.keys(probs).forEach(function(first) {
Object.keys(probs[first]).forEach(function(second) {
probs[first][second] = percent(probs[first][second], count);
});
});
});

return probs;
return probs;
}

// Extract probabilities of word triples.
function trainTriples(words) {
var probs = {};
var count = 0;

words.forEach(function(w) {
w = clean(w);

for (var i = 0; i < w.length - 2; i++) {
if (!probs[w[i]]) probs[w[i]] = {};
if (!probs[w[i]][w[i + 1]]) probs[w[i]][w[i + 1]] = {};
if (!probs[w[i]][w[i + 1]][w[i + 2]]) probs[w[i]][w[i + 1]][w[i + 2]] = 1;
else probs[w[i]][w[i + 1]][w[i + 2]]++;
count++;
}
});

Object.keys(probs).forEach(function(first) {
Object.keys(probs[first]).forEach(function(second) {
Object.keys(probs[first][second]).forEach(function(third) {
probs[first][second][third] = percent(
probs[first][second][third],
count
);
});
var probs = {};
var count = 0;

words.forEach(function(w) {
w = clean(w);

for (var i = 0; i < w.length - 2; i++) {
if (!probs[w[i]]) probs[w[i]] = {};
if (!probs[w[i]][w[i + 1]]) probs[w[i]][w[i + 1]] = {};
if (!probs[w[i]][w[i + 1]][w[i + 2]]) probs[w[i]][w[i + 1]][w[i + 2]] = 1;
else probs[w[i]][w[i + 1]][w[i + 2]]++;
count++;
}
});

Object.keys(probs).forEach(function(first) {
Object.keys(probs[first]).forEach(function(second) {
Object.keys(probs[first][second]).forEach(function(third) {
probs[first][second][third] = percent(
probs[first][second][third],
count
);
});
});
});
});

return probs;
return probs;
}

// Extract probabilities of word tuples and triples
// from a large list of words.
module.exports.train = function(filename, callback) {
fs.readFile(filename, "utf8", function read(err, data) {
if (err) throw err;
fs.readFile(filename, "utf8", function read(err, data) {
if (err) throw err;

var words = data.trim().split(/\s+/);
var tuples = trainTuples(words);
var triples = trainTriples(words);
var words = data.trim().split(/\s+/);
var tuples = trainTuples(words);
var triples = trainTriples(words);

callback(tuples, triples);
});
callback(tuples, triples);
});
};

// Check whether a word is pronounceable using
// the word tuple probabilities.
module.exports.test = function(word) {
var w = clean(word);
var w = clean(word);

switch (w.length) {
switch (w.length) {
case 1:
break;
break;

case 2:
for (var i = 0; i < w.length - 1; i++) {
if (undef(w, i, 2, tuples)) return false;
if (tuples[w[i]][w[i + 1]] < threshold) return false;
}
for (var i = 0; i < w.length - 1; i++) {
if (undef(w, i, 2, tuples)) return false;
if (tuples[w[i]][w[i + 1]] < threshold) return false;
}

default:
for (var i = 0; i < w.length - 2; i++) {
if (undef(w, i, 3, triples)) return false;
if (triples[w[i]][w[i + 1]][w[i + 2]] < threshold) return false;
}
}
for (var i = 0; i < w.length - 2; i++) {
if (undef(w, i, 3, triples)) return false;
if (triples[w[i]][w[i + 1]][w[i + 2]] < threshold) return false;
}
}
var vowels = 'aeiouy';
var countVowel = 0;
for (var i = 0; i < w.length; i++) {
if (vowels.indexOf(w[i]) !== -1) {
countVowel++; // found a vowel, add it to the count
}
}
if (countVowel == 0) return false;

return true;
return true;
};

// Compute a normalised score for
// the pronounceability of the word.
module.exports.score = function(word) {
var w = clean(word);
var score = 0;
var w = clean(word);
var score = 0;

switch (w.length) {
switch (w.length) {
case 1:
return 1;
return 1;

case 2:
for (var i = 0; i < w.length - 1; i++) {
if (undef(w, i, 2, tuples)) {
score = score + 0;
} else {
score = score + tuples[w[i]][w[i + 1]];
}
}
for (var i = 0; i < w.length - 1; i++) {
if (undef(w, i, 2, tuples)) {
score = score + 0;
} else {
score = score + tuples[w[i]][w[i + 1]];
}
}

default:
for (var i = 0; i < w.length - 2; i++) {
if (undef(w, i, 3, triples)) {
score = score + 0;
} else {
score = score + triples[w[i]][w[i + 1]][w[i + 2]];
}
}
}

return score / w.length;
for (var i = 0; i < w.length - 2; i++) {
if (undef(w, i, 3, triples)) {
score = score + 0;
} else {
score = score + triples[w[i]][w[i + 1]][w[i + 2]];
}
}
}

var vowels = 'aeiouy';
var countVowel = 0;
for (var i = 0; i < w.length; i++) {

This comment has been minimized.

Copy link
@lukem512

lukem512 Jul 24, 2017

This could be rewritten as:

var vowelCount = w.split('').reduce(function(vowelCount, letter) {
  if (vowels.indexOf(letter) !== -1) {
    return vowelCount + 1;
  }
  return vowelCount;
}, 0);

However, this loop will halt as soon as a single vowel is found and will speed up computation of larger lists quite significantly. This code will do this:

var containsVowel = w.split('').some(function(letter) {
  if (vowels.indexOf(letter) !== -1) {
    return true;
  }
});
if (!containsVowel) return 0;

This comment has been minimized.

Copy link
@danielkberry

danielkberry Jul 25, 2017

Author Owner

These are great additions and should definitely be incorporated. I wasn't aware of JavaScript's functional programming capabilities (reduce, for example) as I have only just started using it (my background is in Python and R). Thanks for the improvements.

This comment has been minimized.

Copy link
@lukem512

lukem512 Jul 25, 2017

You're welcome. You should definitely check out the map, reduce, forEach and some functions for array manipulation. There is also a filter and an every method for performing array segregation.

if (vowels.indexOf(w[i]) !== -1) {
countVowel++; // found a vowel, add it to the count
}
}
if (countVowel == 0) return 0;


return score / w.length;
};

0 comments on commit cd55de0

Please sign in to comment.