@inproceedings{ranganathan-t-v-2022-building,
title = "Building and Analysis of {T}amil Lyric Corpus with Semantic Representation",
author = "Ranganathan, Karthika and
T V, Geetha",
editor = "Ortega, John E. and
Carpuat, Marine and
Chen, William and
Kann, Katharina and
Lignos, Constantine and
Popovic, Maja and
Tafreshi, Shabnam",
booktitle = "Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Workshop 2: Corpus Generation and Corpus Augmentation for Machine Translation)",
month = sep,
year = "2022",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2022.amta-coco4mt.3",
pages = "18--27",
abstract = "In the new era of modern technology, the cloud has become the library for many things including entertainment, i.e, the availability of lyrics. In order to create awareness about the language and to increase the interest in Tamil film lyrics, a computerized electronic format of Tamil lyrics corpus is necessary for mining the lyric documents. In this paper, the Tamil lyric corpus was collected from various books and lyric websites. Here, we also address the challenges faced while building this corpus. A corpus was created with 15286 documents and stored all the lyric information obtained in the XML format. In this paper, we also explained the Universal Networking Language (UNL) semantic representation that helps to represent the document in a language and domain independent ways. We evaluated this corpus by performing simple statistical analysis for characters, words and a few rhetorical effect analysis. We also evaluated our semantic representation with the existing work and the results are very encouraging.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ranganathan-t-v-2022-building">
<titleInfo>
<title>Building and Analysis of Tamil Lyric Corpus with Semantic Representation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karthika</namePart>
<namePart type="family">Ranganathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geetha</namePart>
<namePart type="family">T V</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Workshop 2: Corpus Generation and Corpus Augmentation for Machine Translation)</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the new era of modern technology, the cloud has become the library for many things including entertainment, i.e, the availability of lyrics. In order to create awareness about the language and to increase the interest in Tamil film lyrics, a computerized electronic format of Tamil lyrics corpus is necessary for mining the lyric documents. In this paper, the Tamil lyric corpus was collected from various books and lyric websites. Here, we also address the challenges faced while building this corpus. A corpus was created with 15286 documents and stored all the lyric information obtained in the XML format. In this paper, we also explained the Universal Networking Language (UNL) semantic representation that helps to represent the document in a language and domain independent ways. We evaluated this corpus by performing simple statistical analysis for characters, words and a few rhetorical effect analysis. We also evaluated our semantic representation with the existing work and the results are very encouraging.</abstract>
<identifier type="citekey">ranganathan-t-v-2022-building</identifier>
<location>
<url>https://aclanthology.org/2022.amta-coco4mt.3</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>18</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building and Analysis of Tamil Lyric Corpus with Semantic Representation
%A Ranganathan, Karthika
%A T V, Geetha
%Y Ortega, John E.
%Y Carpuat, Marine
%Y Chen, William
%Y Kann, Katharina
%Y Lignos, Constantine
%Y Popovic, Maja
%Y Tafreshi, Shabnam
%S Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Workshop 2: Corpus Generation and Corpus Augmentation for Machine Translation)
%D 2022
%8 September
%I Association for Machine Translation in the Americas
%F ranganathan-t-v-2022-building
%X In the new era of modern technology, the cloud has become the library for many things including entertainment, i.e, the availability of lyrics. In order to create awareness about the language and to increase the interest in Tamil film lyrics, a computerized electronic format of Tamil lyrics corpus is necessary for mining the lyric documents. In this paper, the Tamil lyric corpus was collected from various books and lyric websites. Here, we also address the challenges faced while building this corpus. A corpus was created with 15286 documents and stored all the lyric information obtained in the XML format. In this paper, we also explained the Universal Networking Language (UNL) semantic representation that helps to represent the document in a language and domain independent ways. We evaluated this corpus by performing simple statistical analysis for characters, words and a few rhetorical effect analysis. We also evaluated our semantic representation with the existing work and the results are very encouraging.
%U https://aclanthology.org/2022.amta-coco4mt.3
%P 18-27
Markdown (Informal)
[Building and Analysis of Tamil Lyric Corpus with Semantic Representation](https://aclanthology.org/2022.amta-coco4mt.3) (Ranganathan & T V, AMTA 2022)
ACL
- Karthika Ranganathan and Geetha T V. 2022. Building and Analysis of Tamil Lyric Corpus with Semantic Representation. In Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Workshop 2: Corpus Generation and Corpus Augmentation for Machine Translation), pages 18–27, None. Association for Machine Translation in the Americas.