Source code for sussex_nltk.tokenize

"""
.. codeauthor::
    Matti Lyra
"""

import cmu

[docs]def twitter_tokenize_batch(sents):
    """Tokenizes a list of sentences using the CMU twitter tokenizer. 
    """
    _output_data = cmu.tag(sents)
    _output_tokens= []
    for line in _output_data.split('\n'):
        token,_,pos_tag = line.partition('\t') 
        _output_tokens.append( token )
    
    return _output_tokens
    
[docs]def twitter_tokenize(sent, root=None):
    """Tokenizes a sentence using the CMU twitter tokenizer.
    """
    return twitter_tokenize_batch([sent])

Navigation

Source code for sussex_nltk.tokenize

Quick search

Navigation