"""
.. codeauthor::
Matti Lyra
"""
import cmu
[docs]def twitter_tokenize_batch(sents):
"""Tokenizes a list of sentences using the CMU twitter tokenizer.
"""
_output_data = cmu.tag(sents)
_output_tokens= []
for line in _output_data.split('\n'):
token,_,pos_tag = line.partition('\t')
_output_tokens.append( token )
return _output_tokens
[docs]def twitter_tokenize(sent, root=None):
"""Tokenizes a sentence using the CMU twitter tokenizer.
"""
return twitter_tokenize_batch([sent])