Source code for sussex_nltk.spell
"""
.. codeauthor::
Matti Lyra
"""
import os
from exceptions import AttributeError
import sussex_nltk as susx
_dict_types = ['aspell']
_dict_languages = ['en','en_GB','en_US','en_CA']
def _read_word_list(file_handle):
wl = set()
for line in file_handle:
wl.add(line.strip())
return wl
[docs]def dictionary(dict_type='aspell', dict_language='en_GB'):
wordlist = set()
join = os.path.join
with open(join(susx._sussex_root, 'data','aspell','en-common.wl'), 'r') as fh:
wordlist.union(_read_word_list(fh))
if dict_type not in _dict_types:
raise AttributeError('Unrecognized dictionary type (%s), must be one of %s.'%(dict_type, ' '.join(_dict_types)))
if dict_language not in _dict_languages:
raise AttributeError('Unrecognized dictionary language (%s), must be one of %s.'%(dict_language, ' '.join(_dict_languages)))
wl_files = []
if dict_type == 'aspell':
wl_files = os.listdir(os.path.join(susx._sussex_root, 'data','aspell'))
wl_files = [f for f in wl_files if f.startswith(dict_language)]
for wl in wl_files:
with open(join(susx._sussex_root,'data','aspell',wl),'r') as fh:
wordlist = wordlist.union(_read_word_list(fh))
return wordlist