Source code for mlprimitives.custom.counters

# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd


[docs]class Counter(): def __init__(self, scalar=True, add=0): self.scalar = scalar self.add = add def _count(self, column): raise NotImplementedError
[docs] def count(self, X): if len(X.shape) > 2: raise ValueError('Only 1d or 2d arrays are supported') elif self.scalar and len(X.shape) == 2 and X.shape[1] == 2: raise ValueError('If scalar is True, only single column arrays are supported') if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) self.counts = list() for column in X: count = self._count(X[column]) + self.add self.counts.append(count)
[docs] def get_counts(self): if self.scalar: return self.counts[0] else: return np.array(self.counts)
[docs]class UniqueCounter(Counter): def _count(self, column): return len(np.unique(column))
[docs]class VocabularyCounter(Counter): def __init__(self, total=True, *args, **kwargs): self.total = total super().__init__(*args, **kwargs) def _count(self, column): count = 0 if self.total: vocabulary = set() for text in column: words = text.split() if self.total: vocabulary.update(words) count = len(vocabulary) else: count = max(count, len(words)) return count
[docs]def count_features(X): if len(X.shape) != 2: raise ValueError('Only 2d arrays are supported') return X.shape[1]