@inproceedings{xie-etal-2024-gecsum-generative,
title = "{GECS}um: Generative Evaluation-Driven Sequence Level Contrastive Learning for Abstractive Summarization",
author = "Xie, Jiawen and
Zhang, Shaoting and
Zhang, Xiaofan",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.670",
pages = "7581--7595",
abstract = "While dominant in abstractive summarization, transformer-based language models with the standard maximum likelihood estimation (MLE) training remain challenged by two discrepancies: the misalignment between token-level training and sequence-level evaluation, and the divergence between teacher-forcing training manner and auto-regressive generation behavior. Recent studies have shown that sequence-level contrastive learning, which utilizes the quality differences between multiple summaries as prior information, can effectively mitigate these issues. However, as certain evaluation metrics often determine the contrastive signals in existing methods, this leads to the model performance aligning with the preferences of these metrics being limited by the evaluation capabilities of these metrics. Inspired by prior works that treat the evaluation of generated text as a text generation problem, we propose a generative evaluation-driven contrastive learning framework, which leverages the semantic understanding capabilities of the abstractive model itself to evaluate summary in reference-based settings. In this way, our method establishes a connection between the model{'}s reference-based evaluation and reference-free generation scenarios, allowing them to share the benefits of model capability enhancements. Extensive experiments on four summarization datasets demonstrate that our method outperforms the previous state-of-the-art regarding comprehensive performance. Various empirical analyses further substantiate the effectiveness of our method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xie-etal-2024-gecsum-generative">
<titleInfo>
<title>GECSum: Generative Evaluation-Driven Sequence Level Contrastive Learning for Abstractive Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiawen</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaoting</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaofan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While dominant in abstractive summarization, transformer-based language models with the standard maximum likelihood estimation (MLE) training remain challenged by two discrepancies: the misalignment between token-level training and sequence-level evaluation, and the divergence between teacher-forcing training manner and auto-regressive generation behavior. Recent studies have shown that sequence-level contrastive learning, which utilizes the quality differences between multiple summaries as prior information, can effectively mitigate these issues. However, as certain evaluation metrics often determine the contrastive signals in existing methods, this leads to the model performance aligning with the preferences of these metrics being limited by the evaluation capabilities of these metrics. Inspired by prior works that treat the evaluation of generated text as a text generation problem, we propose a generative evaluation-driven contrastive learning framework, which leverages the semantic understanding capabilities of the abstractive model itself to evaluate summary in reference-based settings. In this way, our method establishes a connection between the model’s reference-based evaluation and reference-free generation scenarios, allowing them to share the benefits of model capability enhancements. Extensive experiments on four summarization datasets demonstrate that our method outperforms the previous state-of-the-art regarding comprehensive performance. Various empirical analyses further substantiate the effectiveness of our method.</abstract>
<identifier type="citekey">xie-etal-2024-gecsum-generative</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.670</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7581</start>
<end>7595</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GECSum: Generative Evaluation-Driven Sequence Level Contrastive Learning for Abstractive Summarization
%A Xie, Jiawen
%A Zhang, Shaoting
%A Zhang, Xiaofan
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F xie-etal-2024-gecsum-generative
%X While dominant in abstractive summarization, transformer-based language models with the standard maximum likelihood estimation (MLE) training remain challenged by two discrepancies: the misalignment between token-level training and sequence-level evaluation, and the divergence between teacher-forcing training manner and auto-regressive generation behavior. Recent studies have shown that sequence-level contrastive learning, which utilizes the quality differences between multiple summaries as prior information, can effectively mitigate these issues. However, as certain evaluation metrics often determine the contrastive signals in existing methods, this leads to the model performance aligning with the preferences of these metrics being limited by the evaluation capabilities of these metrics. Inspired by prior works that treat the evaluation of generated text as a text generation problem, we propose a generative evaluation-driven contrastive learning framework, which leverages the semantic understanding capabilities of the abstractive model itself to evaluate summary in reference-based settings. In this way, our method establishes a connection between the model’s reference-based evaluation and reference-free generation scenarios, allowing them to share the benefits of model capability enhancements. Extensive experiments on four summarization datasets demonstrate that our method outperforms the previous state-of-the-art regarding comprehensive performance. Various empirical analyses further substantiate the effectiveness of our method.
%U https://aclanthology.org/2024.lrec-main.670
%P 7581-7595
Markdown (Informal)
[GECSum: Generative Evaluation-Driven Sequence Level Contrastive Learning for Abstractive Summarization](https://aclanthology.org/2024.lrec-main.670) (Xie et al., LREC-COLING 2024)
ACL