@inproceedings{etezadi-etal-2022-dadmatools,
title = "{D}adma{T}ools: Natural Language Processing Toolkit for {P}ersian Language",
author = "Etezadi, Romina and
Karrabi, Mohammad and
Zare, Najmeh and
Sajadi, Mohamad Bagher and
Pilehvar, Mohammad Taher",
editor = "Hajishirzi, Hannaneh and
Ning, Qiang and
Sil, Avi",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-demo.13",
doi = "10.18653/v1/2022.naacl-demo.13",
pages = "124--130",
abstract = "We introduce DadmaTools, an open-source Python Natural Language Processing toolkit for the Persian language. The toolkit is a neural pipeline based on spaCy for several text processing tasks, including normalization, tokenization, lemmatization, part-of-speech, dependency parsing, constituency parsing, chunking, and ezafe detecting. DadmaTools relies on fine-tuning of ParsBERT using the PerDT dataset for most of the tasks. Dataset module and embedding module are included in DadmaTools that support different Persian datasets, embeddings, and commonly used functions for them. Our evaluations show that DadmaTools can attain state-of-the-art performance on multiple NLP tasks. The source code is freely available at \url{https://github.com/Dadmatech/DadmaTools}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="etezadi-etal-2022-dadmatools">
<titleInfo>
<title>DadmaTools: Natural Language Processing Toolkit for Persian Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Romina</namePart>
<namePart type="family">Etezadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Karrabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Najmeh</namePart>
<namePart type="family">Zare</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamad</namePart>
<namePart type="given">Bagher</namePart>
<namePart type="family">Sajadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Ning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce DadmaTools, an open-source Python Natural Language Processing toolkit for the Persian language. The toolkit is a neural pipeline based on spaCy for several text processing tasks, including normalization, tokenization, lemmatization, part-of-speech, dependency parsing, constituency parsing, chunking, and ezafe detecting. DadmaTools relies on fine-tuning of ParsBERT using the PerDT dataset for most of the tasks. Dataset module and embedding module are included in DadmaTools that support different Persian datasets, embeddings, and commonly used functions for them. Our evaluations show that DadmaTools can attain state-of-the-art performance on multiple NLP tasks. The source code is freely available at https://github.com/Dadmatech/DadmaTools.</abstract>
<identifier type="citekey">etezadi-etal-2022-dadmatools</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-demo.13</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-demo.13</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>124</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DadmaTools: Natural Language Processing Toolkit for Persian Language
%A Etezadi, Romina
%A Karrabi, Mohammad
%A Zare, Najmeh
%A Sajadi, Mohamad Bagher
%A Pilehvar, Mohammad Taher
%Y Hajishirzi, Hannaneh
%Y Ning, Qiang
%Y Sil, Avi
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F etezadi-etal-2022-dadmatools
%X We introduce DadmaTools, an open-source Python Natural Language Processing toolkit for the Persian language. The toolkit is a neural pipeline based on spaCy for several text processing tasks, including normalization, tokenization, lemmatization, part-of-speech, dependency parsing, constituency parsing, chunking, and ezafe detecting. DadmaTools relies on fine-tuning of ParsBERT using the PerDT dataset for most of the tasks. Dataset module and embedding module are included in DadmaTools that support different Persian datasets, embeddings, and commonly used functions for them. Our evaluations show that DadmaTools can attain state-of-the-art performance on multiple NLP tasks. The source code is freely available at https://github.com/Dadmatech/DadmaTools.
%R 10.18653/v1/2022.naacl-demo.13
%U https://aclanthology.org/2022.naacl-demo.13
%U https://doi.org/10.18653/v1/2022.naacl-demo.13
%P 124-130
Markdown (Informal)
[DadmaTools: Natural Language Processing Toolkit for Persian Language](https://aclanthology.org/2022.naacl-demo.13) (Etezadi et al., NAACL 2022)
ACL
- Romina Etezadi, Mohammad Karrabi, Najmeh Zare, Mohamad Bagher Sajadi, and Mohammad Taher Pilehvar. 2022. DadmaTools: Natural Language Processing Toolkit for Persian Language. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations, pages 124–130, Hybrid: Seattle, Washington + Online. Association for Computational Linguistics.