@inproceedings{steuer-etal-2023-information,
title = "Information-Theoretic Characterization of Vowel Harmony: A Cross-Linguistic Study on Word Lists",
author = "Steuer, Julius and
List, Johann-Mattis and
Abdullah, Badr M. and
Klakow, Dietrich",
editor = "Beinborn, Lisa and
Goswami, Koustava and
Murado{\u{g}}lu, Saliha and
Sorokin, Alexey and
Kumar, Ritesh and
Shcherbakov, Andreas and
Ponti, Edoardo M. and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigtyp-1.10",
doi = "10.18653/v1/2023.sigtyp-1.10",
pages = "96--109",
abstract = "We present a cross-linguistic study of vowel harmony that aims to quantifies this phenomenon using data-driven computational modeling. Concretely, we define an information-theoretic measure of harmonicity based on the predictability of vowels in a natural language lexicon, which we estimate using phoneme-level language models (PLMs). Prior quantitative studies have heavily relied on inflected word-forms in the analysis on vowel harmony. On the contrary, we train our models using cross-linguistically comparable lemma forms with little or no inflection, which enables us to cover more under-studied languages. Training data for our PLMs consists of word lists offering a maximum of 1000 entries per language. Despite the fact that the data we employ are substantially smaller than previously used corpora, our experiments demonstrate the neural PLMs capture vowel harmony patterns in a set of languages that exhibit this phenomenon. Our work also demonstrates that word lists are a valuable resource for typological research, and offers new possibilities for future studies on low-resource, under-studied languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="steuer-etal-2023-information">
<titleInfo>
<title>Information-Theoretic Characterization of Vowel Harmony: A Cross-Linguistic Study on Word Lists</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julius</namePart>
<namePart type="family">Steuer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johann-Mattis</namePart>
<namePart type="family">List</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badr</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Abdullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dietrich</namePart>
<namePart type="family">Klakow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Beinborn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustava</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saliha</namePart>
<namePart type="family">Muradoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexey</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Shcherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a cross-linguistic study of vowel harmony that aims to quantifies this phenomenon using data-driven computational modeling. Concretely, we define an information-theoretic measure of harmonicity based on the predictability of vowels in a natural language lexicon, which we estimate using phoneme-level language models (PLMs). Prior quantitative studies have heavily relied on inflected word-forms in the analysis on vowel harmony. On the contrary, we train our models using cross-linguistically comparable lemma forms with little or no inflection, which enables us to cover more under-studied languages. Training data for our PLMs consists of word lists offering a maximum of 1000 entries per language. Despite the fact that the data we employ are substantially smaller than previously used corpora, our experiments demonstrate the neural PLMs capture vowel harmony patterns in a set of languages that exhibit this phenomenon. Our work also demonstrates that word lists are a valuable resource for typological research, and offers new possibilities for future studies on low-resource, under-studied languages.</abstract>
<identifier type="citekey">steuer-etal-2023-information</identifier>
<identifier type="doi">10.18653/v1/2023.sigtyp-1.10</identifier>
<location>
<url>https://aclanthology.org/2023.sigtyp-1.10</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>96</start>
<end>109</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Information-Theoretic Characterization of Vowel Harmony: A Cross-Linguistic Study on Word Lists
%A Steuer, Julius
%A List, Johann-Mattis
%A Abdullah, Badr M.
%A Klakow, Dietrich
%Y Beinborn, Lisa
%Y Goswami, Koustava
%Y Muradoğlu, Saliha
%Y Sorokin, Alexey
%Y Kumar, Ritesh
%Y Shcherbakov, Andreas
%Y Ponti, Edoardo M.
%Y Cotterell, Ryan
%Y Vylomova, Ekaterina
%S Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F steuer-etal-2023-information
%X We present a cross-linguistic study of vowel harmony that aims to quantifies this phenomenon using data-driven computational modeling. Concretely, we define an information-theoretic measure of harmonicity based on the predictability of vowels in a natural language lexicon, which we estimate using phoneme-level language models (PLMs). Prior quantitative studies have heavily relied on inflected word-forms in the analysis on vowel harmony. On the contrary, we train our models using cross-linguistically comparable lemma forms with little or no inflection, which enables us to cover more under-studied languages. Training data for our PLMs consists of word lists offering a maximum of 1000 entries per language. Despite the fact that the data we employ are substantially smaller than previously used corpora, our experiments demonstrate the neural PLMs capture vowel harmony patterns in a set of languages that exhibit this phenomenon. Our work also demonstrates that word lists are a valuable resource for typological research, and offers new possibilities for future studies on low-resource, under-studied languages.
%R 10.18653/v1/2023.sigtyp-1.10
%U https://aclanthology.org/2023.sigtyp-1.10
%U https://doi.org/10.18653/v1/2023.sigtyp-1.10
%P 96-109
Markdown (Informal)
[Information-Theoretic Characterization of Vowel Harmony: A Cross-Linguistic Study on Word Lists](https://aclanthology.org/2023.sigtyp-1.10) (Steuer et al., SIGTYP 2023)
ACL