Source code for arsenal.alphabet
[docs]class Alphabet(object):
"""
Class for maintaining a perfect hash for a set of keys.
>>> a = Alphabet()
>>> [a[x] for x in 'abcd']
[0, 1, 2, 3]
>>> list(map(a.lookup, range(4)))
['a', 'b', 'c', 'd']
>>> a.freeze()
>>> a.add('z')
Traceback (most recent call last):
...
ValueError: Alphabet is frozen. Key "z" not found.
>>> print(a.plaintext())
a
b
c
d
>>> print(a)
Alphabet(size=4,frozen=True)
>>> list(a)
['a', 'b', 'c', 'd']
>>> a == Alphabet(['a', 'b', 'c', 'd'])
True
>>> a == Alphabet(['b', 'a', 'c', 'd'])
False
>>> a.map('aabc')
[0, 0, 1, 2]
"""
def __init__(self, data=()):
self._map = {} # str -> int
self._list = [] # int -> str
self._frozen = False
self.add_many(data)
def __repr__(self):
return 'Alphabet(size=%s,frozen=%s)' % (len(self), self._frozen)
[docs] def freeze(self):
self._frozen = True
# def keys(self):
# return self._map.keys()
[docs] def items(self):
return self._map.items()
[docs] def imap(self, seq):
"""
Apply alphabet to sequence while filtering. By default, `None` is not
emitted, so the Note that the output sequence may have fewer items.
"""
for s in seq:
yield self[s]
[docs] def map(self, seq):
return list(self.imap(seq))
[docs] def add_many(self, x):
for k in x:
self.add(k)
[docs] def lookup(self, i):
return self._list[i]
[docs] def lookup_many(self, x):
return list(map(self.lookup, x))
def __contains__(self, k):
return k in self._map
def __getitem__(self, k):
try:
return self._map[k]
except KeyError:
#if not isinstance(k, basestring):
# raise ValueError("Invalid key (%s): only strings allowed." % (k,))
if self._frozen:
raise ValueError('Alphabet is frozen. Key "%s" not found.' % (k,))
x = self._map[k] = len(self._list)
self._list.append(k)
return x
__call__ = __getitem__
add = __getitem__
def __iter__(self):
return iter(self._list)
def __len__(self):
return len(self._map)
[docs] def plaintext(self):
"assumes keys are strings"
return '\n'.join(self)
[docs] @classmethod
def load(cls, filename):
#if not os.path.exists(filename): return cls()
with open(filename) as f:
return cls(l.strip() for l in f)
[docs] def save(self, filename):
with open(filename, 'w') as f:
f.write(self.plaintext())
def __eq__(self, other):
return self._list == other._list
encode_many = map
decode_many = lookup_many
encode = __getitem__
decode = lookup
if __name__ == '__main__':
import doctest
doctest.testmod()