Skip to content

Commit

Permalink
make aeo prior to iu, fix #8
Browse files Browse the repository at this point in the history
  • Loading branch information
lxyu committed May 10, 2016
1 parent 0f827fe commit 4dbd542
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 6 deletions.
13 changes: 8 additions & 5 deletions pinyin/pinyin.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# -*- coding: utf-8 -*-

import os
import itertools
import unicodedata

from ._compat import u

__all__ = ['get', 'get_pinyin', 'get_initial']

tonemarks = ["", u("̄"), u("́"), u("̌"), u("̀"), ""]


# init pinyin dict
pinyin_dict = {}
Expand Down Expand Up @@ -35,9 +35,10 @@ def _pinyin_generator(chars, format):
elif format == "numerical":
pinyin += str(tone)
elif format == "diacritical":
# Find first vowel -- we should put the diacritical mark
# just after
vowel = pinyin.index(next(x for x in pinyin if x in "aeiou")) + 1
# Find first vowel -- where we should put the diacritical mark
vowels = itertools.chain((c for c in pinyin if c in "aeo"),
(c for c in pinyin if c in "iu"))
vowel = pinyin.index(next(vowels)) + 1
pinyin = pinyin[:vowel] + tonemarks[tone] + pinyin[vowel:]
else:
error = "Format must be one of: numerical/diacritical/strip"
Expand Down Expand Up @@ -65,3 +66,5 @@ def get_initial(s, delimiter=' '):
"""
initials = (p[0] for p in _pinyin_generator(u(s), format="strip"))
return delimiter.join(initials)

tonemarks = ["", u("̄"), u("́"), u("̌"), u("̀"), ""]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='pinyin',
version='0.3',
version='0.3.1',
description='Translate chinese chars to pinyin based on Mandarin.dat',
author='Lx Yu',
author_email='[email protected]',
Expand Down
6 changes: 6 additions & 0 deletions test_pinyin.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ def test_get_initial(self):
def test_mixed_chinese_english_input(self):
self.assertEqual(pinyin.get('hi你好'), u('hinǐhǎo'))

def test_correct_diacritical(self):
self.assertEqual(pinyin.get("操"), u("cāo"))
self.assertEqual(pinyin.get("小"), u("xiǎo"))
self.assertEqual(pinyin.get("绝"), u("jué"))
self.assertEqual(pinyin.get("被"), u("bèi"))


if __name__ == '__main__':
unittest.main()

0 comments on commit 4dbd542

Please sign in to comment.