@inproceedings{lee-etal-2024-superst-superficial,
title = "{S}uper{ST}: Superficial Self-Training for Few-Shot Text Classification",
author = "Lee, Ju-Hyoung and
Hahn, Joonghyuk and
Seo, Hyeon-Tae and
Park, Jiho and
Han, Yo-Sub",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1341",
pages = "15436--15447",
abstract = "In few-shot text classification, self-training is a popular tool in semi-supervised learning (SSL). It relies on pseudo-labels to expand data, which has demonstrated success. However, these pseudo-labels contain potential noise and provoke a risk of underfitting the decision boundary. While the pseudo-labeled data can indeed be noisy, fully acquiring this flawed data can result in the accumulation of further noise and eventually impacting the model performance. Consequently, self-training presents a challenge: mitigating the accumulation of noise in the pseudo-labels. Confronting this challenge, we introduce superficial learning, inspired by pedagogy{'}s focus on essential knowledge. Superficial learning in pedagogy is a learning scheme that only learns the material {`}at some extent{'}, not fully understanding the material. This approach is usually avoided in education but counter-intuitively in our context, we employ superficial learning to acquire only the necessary context from noisy data, effectively avoiding the noise. This concept serves as the foundation for SuperST, our self-training framework. SuperST applies superficial learning to the noisy data and fine-tuning to the less noisy data, creating an efficient learning cycle that prevents overfitting to the noise and spans the decision boundary effectively. Notably, SuperST improves the classifier accuracy for few-shot text classification by 18.5{\%} at most and 8{\%} in average, compared with the state-of-the-art SSL baselines. We substantiate our claim through empirical experiments and decision boundary analysis.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2024-superst-superficial">
<titleInfo>
<title>SuperST: Superficial Self-Training for Few-Shot Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ju-Hyoung</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joonghyuk</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyeon-Tae</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiho</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yo-Sub</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In few-shot text classification, self-training is a popular tool in semi-supervised learning (SSL). It relies on pseudo-labels to expand data, which has demonstrated success. However, these pseudo-labels contain potential noise and provoke a risk of underfitting the decision boundary. While the pseudo-labeled data can indeed be noisy, fully acquiring this flawed data can result in the accumulation of further noise and eventually impacting the model performance. Consequently, self-training presents a challenge: mitigating the accumulation of noise in the pseudo-labels. Confronting this challenge, we introduce superficial learning, inspired by pedagogy’s focus on essential knowledge. Superficial learning in pedagogy is a learning scheme that only learns the material ‘at some extent’, not fully understanding the material. This approach is usually avoided in education but counter-intuitively in our context, we employ superficial learning to acquire only the necessary context from noisy data, effectively avoiding the noise. This concept serves as the foundation for SuperST, our self-training framework. SuperST applies superficial learning to the noisy data and fine-tuning to the less noisy data, creating an efficient learning cycle that prevents overfitting to the noise and spans the decision boundary effectively. Notably, SuperST improves the classifier accuracy for few-shot text classification by 18.5% at most and 8% in average, compared with the state-of-the-art SSL baselines. We substantiate our claim through empirical experiments and decision boundary analysis.</abstract>
<identifier type="citekey">lee-etal-2024-superst-superficial</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1341</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>15436</start>
<end>15447</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SuperST: Superficial Self-Training for Few-Shot Text Classification
%A Lee, Ju-Hyoung
%A Hahn, Joonghyuk
%A Seo, Hyeon-Tae
%A Park, Jiho
%A Han, Yo-Sub
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F lee-etal-2024-superst-superficial
%X In few-shot text classification, self-training is a popular tool in semi-supervised learning (SSL). It relies on pseudo-labels to expand data, which has demonstrated success. However, these pseudo-labels contain potential noise and provoke a risk of underfitting the decision boundary. While the pseudo-labeled data can indeed be noisy, fully acquiring this flawed data can result in the accumulation of further noise and eventually impacting the model performance. Consequently, self-training presents a challenge: mitigating the accumulation of noise in the pseudo-labels. Confronting this challenge, we introduce superficial learning, inspired by pedagogy’s focus on essential knowledge. Superficial learning in pedagogy is a learning scheme that only learns the material ‘at some extent’, not fully understanding the material. This approach is usually avoided in education but counter-intuitively in our context, we employ superficial learning to acquire only the necessary context from noisy data, effectively avoiding the noise. This concept serves as the foundation for SuperST, our self-training framework. SuperST applies superficial learning to the noisy data and fine-tuning to the less noisy data, creating an efficient learning cycle that prevents overfitting to the noise and spans the decision boundary effectively. Notably, SuperST improves the classifier accuracy for few-shot text classification by 18.5% at most and 8% in average, compared with the state-of-the-art SSL baselines. We substantiate our claim through empirical experiments and decision boundary analysis.
%U https://aclanthology.org/2024.lrec-main.1341
%P 15436-15447
Markdown (Informal)
[SuperST: Superficial Self-Training for Few-Shot Text Classification](https://aclanthology.org/2024.lrec-main.1341) (Lee et al., LREC-COLING 2024)
ACL
- Ju-Hyoung Lee, Joonghyuk Hahn, Hyeon-Tae Seo, Jiho Park, and Yo-Sub Han. 2024. SuperST: Superficial Self-Training for Few-Shot Text Classification. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 15436–15447, Torino, Italia. ELRA and ICCL.