diff --git a/cantofilter/cli.py b/cantofilter/cli.py index b1a7c8d..d093bb3 100644 --- a/cantofilter/cli.py +++ b/cantofilter/cli.py @@ -2,6 +2,8 @@ import sys from .judge import judge +sys.stdout.reconfigure(encoding='utf-8') + def main(): ''' When used as a command line tool, specify input text file with `--input `, and output type with `--type `. diff --git a/cantofilter/judge.py b/cantofilter/judge.py index f20ab4a..97c2455 100644 --- a/cantofilter/judge.py +++ b/cantofilter/judge.py @@ -2,24 +2,24 @@ from typing import List, Tuple canto_unique = re.compile( - r'[嘅嗰啲咗佢喺咁噉冇啩哋畀嚟諗惗乜嘢閪撚𨳍𨳊瞓睇㗎餸𨋢摷喎嚿噃嚡嘥嗮啱揾搵喐逳噏𢳂岋糴揈捹撳㩒𥄫攰癐冚孻冧𡃁嚫跣𨃩瀡氹嬲掟孭]|' + + r'[嘅嗰啲咗佢喺咁噉冇啩哋畀嚟諗惗乜嘢閪撚𨳍𨳊瞓睇㗎餸𨋢摷喎嚿噃嚡嘥嗮啱揾搵喐逳噏𢳂岋糴揈捹撳㩒𥄫攰癐冚孻冧𡃁嚫跣𨃩瀡氹嬲掟孭黐唞㪗埞忟𢛴]|' + r'唔[係得會好識使洗駛通知到去走掂該]|點[樣會做得解]|[琴尋噚聽第]日|[而依]家|家[下陣]|[真就]係|邊[度個位科]|' + r'[嚇凍攝整揩逢淥浸激][親嚫]|[橫搞傾諗得唔]掂|仲[有係話要得好衰唔]|返[學工去歸]|' + - r'屋企|收皮|傾[偈計]|幫襯|執[好生實返輸]|求其|是[但旦]|[濕溼]碎|零舍|肉[赤緊]') -mando_unique = re.compile(r'[這哪您們唄咱啥甭]|還[是好有]') -mando_feature = re.compile(r'[那是的他她吧沒在麼么些了卻説說吃弄]|而已') + r'屋企|收皮|慳錢|傾[偈計]|幫襯|執[好生實返輸]|求其|是[但旦]|[濕溼]碎|零舍|肉[赤緊]') +mando_unique = re.compile(r'[這哪您們唄咱啥甭她]|還[是好有]') +# “在不” 因為太多融入粵語所以唔喺判別標準內 +mando_feature = re.compile(r'[那是的他吧沒麼么些了卻説說吃弄]|而已') mando_loan = re.compile(r'亞利桑那|剎那|巴塞羅那|薩那|沙那|哈瓦那|印第安那|那不勒斯|支那|' + - r'是[否日次非但旦]|利是|唯命是從|頭頭是道|似是而非|自以為是|俯拾皆是|撩是鬥非|莫衷一是|' + - r'[目綠藍紅]的|的[士確式]|波羅的海|眾矢之的|的而且確|' + + r'是[否日次非但旦]|[利於]是|唯命是從|頭頭是道|似是而非|自以為是|俯拾皆是|撩是鬥非|莫衷一是|唯才是用|' + + r'[目綠藍紅中]的|的[士確式]|波羅的海|眾矢之的|的而且確|大眼的度|' + r'些[微少許小]|' + - r'[淹沉浸覆湮埋沒出]沒|沒[落收]|神出鬼沒|' + - r'了[結無斷當然哥結得解]|[未明]了|不了了之|不得了|大不了|' + - r'他[信人國日殺鄉]|[其利無排維]他|馬耳他|他加祿|他山之石|' + - r'在[場世讀於位編此]|[實存旨志好所自潛]在|無處不在|大有人在|' + - r'[酒網水貼]吧|吧台|' + + r'[淹沉浸覆湮埋沒出]沒|沒[落頂收]|神出鬼沒|' + + r'了[結無斷當然哥結得解事之]|[未明]了|不得了|大不了|' + + r'他[信人國日殺鄉]|[其利無排維結]他|馬耳他|他加祿|他山之石|' + + r'[酒網水貼]吧|吧[台臺枱檯]|' + r'[退忘阻]卻|卻步|' + - r'[遊游小傳解學假淺眾衆][説說]|[說說][話服明]|自圓其[説說]|長話短[說説]|不由分[說説]|' + - r'吃[虧苦]|' + + r'[遊游小傳解學假淺眾衆訴論][説說]|[說説][話服明]|自圓其[説說]|長話短[說説]|不由分[說説]|' + + r'吃[虧苦力]|' + r'弄[堂]') diff --git a/cantofilter/version.py b/cantofilter/version.py index 5c4105c..7863915 100644 --- a/cantofilter/version.py +++ b/cantofilter/version.py @@ -1 +1 @@ -__version__ = "1.0.1" +__version__ = "1.0.2" diff --git a/tests/test_judge.py b/tests/test_judge.py index 3cf3a02..c2080b9 100644 --- a/tests/test_judge.py +++ b/tests/test_judge.py @@ -1,10 +1,10 @@ import unittest from cantofilter.judge import judge -cantonese = ["你喺邊度"] -mandarin = ["你在哪裏"] -mixed = ["是咁的"] -neutral = ["去學校讀書"] +cantonese = ["你喺邊度","乜你今日唔使返學咩","今日好可能會嚟唔到", "我哋影張相留念"] +mandarin = ["你在哪裏","你想插班的話"] +mixed = ["是咁的","屋企停電的話"] +neutral = ["去學校讀書","做人最重要開心"] class TestJudgeFunction(unittest.TestCase):