21 lines
655 B
Python
21 lines
655 B
Python
import flair.datasets
|
|
# corpus = flair.datasets.UD_PERSIAN()
|
|
corpus = flair.datasets.UD_PERSIAN().downsample(0.1)
|
|
|
|
# print("--- 1 Original ---")
|
|
# print(corpus)
|
|
print("--- 2 Downsampled ---")
|
|
# print(corpus)
|
|
|
|
# create label dictionary for a Universal Part-of-Speech tagging task
|
|
upos_dictionary = corpus.make_label_dictionary(label_type='upos')
|
|
# print dictionary
|
|
# print(upos_dictionary)
|
|
|
|
# create label dictionary for a regular POS tagging task
|
|
pos_dictionary = corpus.make_label_dictionary(label_type='pos')
|
|
# create label dictionary for a morphological number tagging task
|
|
tense_dictionary = corpus.make_label_dictionary(label_type='number')
|
|
print()
|
|
|