Source code for sussex_nltk.tokenize

"""
.. codeauthor::
    Matti Lyra
"""

import cmu

[docs]def twitter_tokenize_batch(sents): """Tokenizes a list of sentences using the CMU twitter tokenizer. """ _output_data = cmu.tag(sents) _output_tokens= [] for line in _output_data.split('\n'): token,_,pos_tag = line.partition('\t') _output_tokens.append( token ) return _output_tokens
[docs]def twitter_tokenize(sent, root=None): """Tokenizes a sentence using the CMU twitter tokenizer. """ return twitter_tokenize_batch([sent])