mlprimitives.custom.text module

class mlprimitives.custom.text.TextCleaner(column=None, language='multi', lower=True, accents=True, stopwords=True, non_alpha=True, single_chars=True)[source]

Bases: object

RE_ACCENTS = {'a': re.compile('[àâáäåã]'), 'e': re.compile('[èêéë]'), 'i': re.compile('[ìîíï]'), 'o': re.compile('[òôóö]'), 'u': re.compile('[ùûúü]')}
RE_NON_ALNUM = re.compile('[^\\w\\d]')
RE_NON_ALPHA = re.compile('[^a-z]+')
RE_SYMBOLS = re.compile('[-]')
STOPWORDS = {}
static detect_language(texts)[source]
fit(X)[source]
classmethod get_stopwords(language_code)[source]
produce(X)[source]