@inproceedings{gulinigeer-etal-2021-morphological,
title = "Morphological Analysis Corpus Construction of {U}yghur",
author = "Gulinigeer, Abudouwaili and
Kahaerjiang, Abiderexiti and
Jiamila, Wushouer and
Yunfei, Shen and
Turenisha, Maimaitimin and
Tuergen, Yibulayin",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2021.ccl-1.96",
pages = "1076--1086",
abstract = "Morphological analysis is a fundamental task in natural language processing and results can beapplied to different downstream tasks such as named entity recognition syntactic analysis andmachine translation. However there are many problems in morphological analysis such as lowaccuracy caused by a lack of resources. In this paper to alleviate the lack of resources in Uyghurmorphological analysis research we construct a Uyghur morphological analysis corpus based onthe analysis of grammatical features and the format of the general morphological analysis corpus. We define morphological tags from 14 dimensions and 53 features manually annotate and correctthe dataset. Finally the corpus provided some informations such as word lemma part of speech morphological analysis tags morphological segmentation and lemmatization. Also this paperanalyzes some basic features of the corpus and we use the models and datasets provided bySIGMORPHON Shared Task organizers to design comparative experiments to verify the corpus{'}savailability. Results of the experiment are 85.56{\%} 88.29{\%} respectively. The corpus provides areference value for morphological analysis and promotes the research of Uyghur natural language processing.",
language = "English",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gulinigeer-etal-2021-morphological">
<titleInfo>
<title>Morphological Analysis Corpus Construction of Uyghur</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abudouwaili</namePart>
<namePart type="family">Gulinigeer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abiderexiti</namePart>
<namePart type="family">Kahaerjiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wushouer</namePart>
<namePart type="family">Jiamila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shen</namePart>
<namePart type="family">Yunfei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maimaitimin</namePart>
<namePart type="family">Turenisha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yibulayin</namePart>
<namePart type="family">Tuergen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhu</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaoqi</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Huhhot, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Morphological analysis is a fundamental task in natural language processing and results can beapplied to different downstream tasks such as named entity recognition syntactic analysis andmachine translation. However there are many problems in morphological analysis such as lowaccuracy caused by a lack of resources. In this paper to alleviate the lack of resources in Uyghurmorphological analysis research we construct a Uyghur morphological analysis corpus based onthe analysis of grammatical features and the format of the general morphological analysis corpus. We define morphological tags from 14 dimensions and 53 features manually annotate and correctthe dataset. Finally the corpus provided some informations such as word lemma part of speech morphological analysis tags morphological segmentation and lemmatization. Also this paperanalyzes some basic features of the corpus and we use the models and datasets provided bySIGMORPHON Shared Task organizers to design comparative experiments to verify the corpus’savailability. Results of the experiment are 85.56% 88.29% respectively. The corpus provides areference value for morphological analysis and promotes the research of Uyghur natural language processing.</abstract>
<identifier type="citekey">gulinigeer-etal-2021-morphological</identifier>
<location>
<url>https://aclanthology.org/2021.ccl-1.96</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1076</start>
<end>1086</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morphological Analysis Corpus Construction of Uyghur
%A Gulinigeer, Abudouwaili
%A Kahaerjiang, Abiderexiti
%A Jiamila, Wushouer
%A Yunfei, Shen
%A Turenisha, Maimaitimin
%A Tuergen, Yibulayin
%Y Li, Sheng
%Y Sun, Maosong
%Y Liu, Yang
%Y Wu, Hua
%Y Liu, Kang
%Y Che, Wanxiang
%Y He, Shizhu
%Y Rao, Gaoqi
%S Proceedings of the 20th Chinese National Conference on Computational Linguistics
%D 2021
%8 August
%I Chinese Information Processing Society of China
%C Huhhot, China
%G English
%F gulinigeer-etal-2021-morphological
%X Morphological analysis is a fundamental task in natural language processing and results can beapplied to different downstream tasks such as named entity recognition syntactic analysis andmachine translation. However there are many problems in morphological analysis such as lowaccuracy caused by a lack of resources. In this paper to alleviate the lack of resources in Uyghurmorphological analysis research we construct a Uyghur morphological analysis corpus based onthe analysis of grammatical features and the format of the general morphological analysis corpus. We define morphological tags from 14 dimensions and 53 features manually annotate and correctthe dataset. Finally the corpus provided some informations such as word lemma part of speech morphological analysis tags morphological segmentation and lemmatization. Also this paperanalyzes some basic features of the corpus and we use the models and datasets provided bySIGMORPHON Shared Task organizers to design comparative experiments to verify the corpus’savailability. Results of the experiment are 85.56% 88.29% respectively. The corpus provides areference value for morphological analysis and promotes the research of Uyghur natural language processing.
%U https://aclanthology.org/2021.ccl-1.96
%P 1076-1086
Markdown (Informal)
[Morphological Analysis Corpus Construction of Uyghur](https://aclanthology.org/2021.ccl-1.96) (Gulinigeer et al., CCL 2021)
ACL
- Abudouwaili Gulinigeer, Abiderexiti Kahaerjiang, Wushouer Jiamila, Shen Yunfei, Maimaitimin Turenisha, and Yibulayin Tuergen. 2021. Morphological Analysis Corpus Construction of Uyghur. In Proceedings of the 20th Chinese National Conference on Computational Linguistics, pages 1076–1086, Huhhot, China. Chinese Information Processing Society of China.