Source code for arsenal.nlp.tests.bio2spantest

from arsenal.nlp.annotation import bio2span, Span

[docs]def test_bio2span(): tests = [ (['I-NUM','I-TEMP'], [Span(label='NUM', begins=0, ends=1), Span(label='TEMP', begins=1, ends=2)]), (['I-NUM','B-TEMP'], [Span(label='NUM', begins=0, ends=1), Span(label='TEMP', begins=1, ends=2)]), (['B-NUM','B-TEMP'], [Span(label='NUM', begins=0, ends=1), Span(label='TEMP', begins=1, ends=2)]), (['B-NUM','B-TEMP'], [Span(label='NUM', begins=0, ends=1), Span(label='TEMP', begins=1, ends=2)]), (['B-NUM','O'], [Span(label='NUM', begins=0, ends=1)]), (['O','B-NUM'], [Span(label='NUM', begins=1, ends=2)]), (['O','B-NUM','O'], [Span(label='NUM', begins=1, ends=2)]), (['O','B-NUM','I-NUM'], [Span(label='NUM', begins=1, ends=3)]), ( ['O', 'O', 'O', 'I-NUM', 'I-NUM', 'I-NUM', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'O', 'I-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], [Span(label='NUM', begins=3, ends=6), Span(label='TEMP', begins=6, ends=14), Span(label='NUM', begins=15, ends=23) ] ), ( ['O', 'O', 'O', 'O', 'O', 'O', 'I-TEMP', 'I-NUM', 'I-NUM', 'I-NUM', 'O', 'I-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-TEMP', 'I-TEMP', 'I-TEMP', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], [Span(label='TEMP', begins=6, ends=7), Span(label='NUM', begins=7, ends=10), Span(label='NUM', begins=11, ends=15), Span(label='TEMP', begins=23, ends=26), ] ), ( ['O', 'O', 'O', 'B-NUM', 'I-NUM', 'I-NUM', 'I-NUM' , 'B-NUM', 'I-NUM', 'I-NUM', 'I-NUM', 'O', 'B-NUM', 'I-NUM', 'I-NUM', 'O', 'O', 'O', 'B-TEMP', 'I-TEMP', 'O'], [Span(label='NUM', begins=3, ends=7), Span(label='NUM', begins=7, ends=11), Span(label='NUM', begins=12, ends=15), Span(label='TEMP', begins=18, ends=20), ] ), ] for x, want in tests: print(x) have = bio2span(x, include_O=False) if want == have: print('\033[32mpassed\033[0m') else: print('\033[31mfailed:\033[0m') print(' want:', want) print(' have:', have) print() print('*************************************************************************') print('** Including O Spans.') include_O = [ (['O','B-NUM','I-DATE'], [Span(label='O', begins=0, ends=1), Span(label='NUM', begins=1, ends=2), Span(label='DATE', begins=2, ends=3)] ), (['O','B-NUM','O','I-DATE'], [Span(label='O', begins=0, ends=1), Span(label='NUM', begins=1, ends=2), Span(label='O', begins=2, ends=3), Span(label='DATE', begins=3, ends=4)] ), (['O','B-NUM','O','I-DATE', 'O', 'O'], [Span(label='O', begins=0, ends=1), Span(label='NUM', begins=1, ends=2), Span(label='O', begins=2, ends=3), Span(label='DATE', begins=3, ends=4), Span(label='O', begins=4, ends=5), Span(label='O', begins=5, ends=6)] ), (['O'], [Span(label='O', begins=0, ends=1)] ), ] for x, want in include_O: print(x) have = bio2span(x, include_O=True) if want == have: print('\033[32mpassed\033[0m') else: print('\033[31mfailed:\033[0m') print(' want:', want) print(' have:', have) raise AssertionError print() print('passed test_bio2span')
if __name__ == '__main__': test_bio2span()