Spaces:
Runtime error
Runtime error
π Add LaTeX paper and update gitignore for PDF folder
Browse files- Add paper.tex from pdf folder for LaTeX documentation
- Update .gitignore to exclude all PDF compilation artifacts
- Allow only paper.tex while excluding .aux, .log, .pdf, etc.
- Prepare for GitHub repository push
π€ Generated with Claude Code
Co-Authored-By: Claude <[email protected]>
- .gitignore +19 -4
- pdf/paper.tex +264 -0
.gitignore
CHANGED
|
@@ -55,11 +55,29 @@ checkpoints/
|
|
| 55 |
*.png
|
| 56 |
*.jpg
|
| 57 |
*.jpeg
|
| 58 |
-
*.pdf
|
| 59 |
*.svg
|
| 60 |
training_history.png
|
| 61 |
confusion_matrix.png
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# Logs and temporary files
|
| 64 |
*.log
|
| 65 |
*.tmp
|
|
@@ -130,9 +148,6 @@ doc/
|
|
| 130 |
*.md
|
| 131 |
!README.md
|
| 132 |
|
| 133 |
-
# PDF files (exclude for deployment)
|
| 134 |
-
*.pdf
|
| 135 |
-
pdf/
|
| 136 |
|
| 137 |
# Node modules and web dependencies (if any)
|
| 138 |
node_modules/
|
|
|
|
| 55 |
*.png
|
| 56 |
*.jpg
|
| 57 |
*.jpeg
|
|
|
|
| 58 |
*.svg
|
| 59 |
training_history.png
|
| 60 |
confusion_matrix.png
|
| 61 |
|
| 62 |
+
# PDF files (exclude all except paper.tex)
|
| 63 |
+
*.pdf
|
| 64 |
+
!pdf/paper.tex
|
| 65 |
+
|
| 66 |
+
# LaTeX compilation artifacts (exclude all but .tex)
|
| 67 |
+
*.aux
|
| 68 |
+
*.fdb_latexmk
|
| 69 |
+
*.fls
|
| 70 |
+
*.log
|
| 71 |
+
*.out
|
| 72 |
+
*.synctex.gz
|
| 73 |
+
pdf/*.aux
|
| 74 |
+
pdf/*.fdb_latexmk
|
| 75 |
+
pdf/*.fls
|
| 76 |
+
pdf/*.log
|
| 77 |
+
pdf/*.out
|
| 78 |
+
pdf/*.synctex.gz
|
| 79 |
+
pdf/*.pdf
|
| 80 |
+
|
| 81 |
# Logs and temporary files
|
| 82 |
*.log
|
| 83 |
*.tmp
|
|
|
|
| 148 |
*.md
|
| 149 |
!README.md
|
| 150 |
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
# Node modules and web dependencies (if any)
|
| 153 |
node_modules/
|
pdf/paper.tex
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
\documentclass[11pt, a4paper]{article}
|
| 2 |
+
|
| 3 |
+
% PREAMBLE: PACKAGES AND SETUP
|
| 4 |
+
\usepackage[T1]{fontenc}
|
| 5 |
+
\usepackage[utf8]{inputenc}
|
| 6 |
+
\usepackage[vietnamese, english]{babel}
|
| 7 |
+
\usepackage{amsmath, amssymb}
|
| 8 |
+
\usepackage{graphicx}
|
| 9 |
+
\usepackage{url}
|
| 10 |
+
\usepackage{hyperref}
|
| 11 |
+
\usepackage{booktabs} % For professional-looking tables
|
| 12 |
+
\usepackage{listings} % For code snippets
|
| 13 |
+
\usepackage{xcolor}
|
| 14 |
+
|
| 15 |
+
% Hyperlink setup
|
| 16 |
+
\hypersetup{
|
| 17 |
+
colorlinks=true,
|
| 18 |
+
linkcolor=blue,
|
| 19 |
+
filecolor=magenta,
|
| 20 |
+
urlcolor=cyan,
|
| 21 |
+
pdftitle={An Analysis of Vietnamese Student Feedback using Fine-tuned ViSoBERT},
|
| 22 |
+
pdfpagemode=FullScreen,
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
% Code listing style
|
| 26 |
+
\lstset{
|
| 27 |
+
basicstyle=\ttfamily\footnotesize,
|
| 28 |
+
breaklines=true,
|
| 29 |
+
frame=single,
|
| 30 |
+
backgroundcolor=\color{gray!10},
|
| 31 |
+
language=Python % Set default language for listings
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
% Title and Author Information
|
| 35 |
+
\title{Fine-tuning Vietnamese-Sentiment-visobert for Enhanced Analysis of Vietnamese Student Feedback}
|
| 36 |
+
\author{John Doe\thanks{Corresponding author} \\[2mm]
|
| 37 |
+
Jane Smith \\[2mm]
|
| 38 |
+
\textit{Department of Computer Science, University of Example} \\
|
| 39 |
+
\texttt{\{jdoe, jsmith\}@university.example.edu}}
|
| 40 |
+
\date{\today}
|
| 41 |
+
|
| 42 |
+
\begin{document}
|
| 43 |
+
|
| 44 |
+
\maketitle
|
| 45 |
+
|
| 46 |
+
\begin{abstract}
|
| 47 |
+
The analysis of student feedback is crucial for educational institutions to improve teaching quality and student satisfaction. With the rise of digital feedback platforms, a large volume of unstructured text data is available, necessitating automated sentiment analysis tools. This paper explores the application of state-of-the-art transformer-based models for the sentiment analysis of Vietnamese student feedback. We utilize the \texttt{uitnlp/vietnamese\_students\_feedback} dataset and fine-tune the pre-trained \texttt{5CD-AI/Vietnamese-Sentiment-visobert} model. Our methodology involves data preprocessing, model configuration, and a systematic fine-tuning process. The experimental results demonstrate that the fine-tuned model achieves exceptional performance, with a macro F1-score of 0.93, indicating its high effectiveness in classifying student sentiments into Positive, Neutral, and Negative categories. This work provides a robust baseline for developing real-world applications for educational feedback analysis in Vietnam.
|
| 48 |
+
\end{abstract}
|
| 49 |
+
|
| 50 |
+
\section{Introduction}
|
| 51 |
+
\label{sec:introduction}
|
| 52 |
+
In the modern educational landscape, student feedback serves as a vital source of information for educators and administrators. It provides insights into the effectiveness of teaching methods, course content, and institutional support systems. Traditionally, analyzing this feedback has been a manual and time-consuming process. The proliferation of online surveys, forums, and evaluation systems has led to an explosion of textual data, making manual analysis infeasible \cite{liu2012sentiment}.
|
| 53 |
+
|
| 54 |
+
Sentiment analysis, a subfield of Natural Language Processing (NLP), aims to automatically identify and extract subjective information from text. Recent advancements in deep learning, particularly the introduction of Transformer models like BERT \cite{devlin2019bert}, have revolutionized the field, achieving state-of-the-art results across various languages and tasks.
|
| 55 |
+
|
| 56 |
+
For the Vietnamese language, models such as PhoBERT \cite{phobert} and ViSoBERT \cite{visobert} have set new benchmarks. These models are pre-trained on large-scale Vietnamese corpora, capturing the nuances of the language. Building on this, specialized models like \texttt{5CD-AI/Vietnamese-Sentiment-visobert} offer a powerful starting point for sentiment-specific tasks.
|
| 57 |
+
|
| 58 |
+
This paper addresses the specific challenge of analyzing student feedback, which often contains informal language, slang, and domain-specific terms. We hypothesize that fine-tuning a powerful, pre-trained sentiment model on a domain-specific dataset will yield superior performance. Our main contributions are:
|
| 59 |
+
\begin{itemize}
|
| 60 |
+
\item We identify and utilize the \texttt{uitnlp/vietnamese\_students\_feedback} dataset, a valuable resource for this task.
|
| 61 |
+
\item We present a detailed methodology for fine-tuning the \texttt{5CD-AI/Vietnamese-Sentiment-visobert} model on this dataset.
|
| 62 |
+
\item We provide a comprehensive experimental evaluation, demonstrating the model's high accuracy and robustness.
|
| 63 |
+
\end{itemize}
|
| 64 |
+
|
| 65 |
+
\section{Related Work}
|
| 66 |
+
\label{sec:related_work}
|
| 67 |
+
\subsection{Sentiment Analysis}
|
| 68 |
+
Sentiment analysis has evolved from lexicon-based methods \cite{turney2002thumbs} to sophisticated machine learning and deep learning models. Traditional machine learning approaches, such as Support Vector Machines (SVM) and Naive Bayes, rely heavily on feature engineering like Bag-of-Words or TF-IDF \cite{pang2002thumbs}. The advent of deep learning, with models like Recurrent Neural Networks (RNNs) and LSTMs, alleviated the need for manual feature engineering by learning representations directly from text \cite{liu2012sentiment}.
|
| 69 |
+
|
| 70 |
+
\subsection{Transformer Models for NLP}
|
| 71 |
+
The introduction of the Transformer architecture \cite{vaswani2017attention} marked a paradigm shift in NLP. BERT (Bidirectional Encoder Representations from Transformers) \cite{devlin2019bert} demonstrated the power of pre-training on a massive text corpus and then fine-tuning on a specific downstream task. This approach has been adapted for numerous languages, including Vietnamese.
|
| 72 |
+
|
| 73 |
+
\subsection{Vietnamese Language Models}
|
| 74 |
+
PhoBERT \cite{phobert} was one of the first large-scale monolingual BERT-style models for Vietnamese, pre-trained on a 20GB corpus of news and social media text. ViSoBERT \cite{visobert} further improved upon this by focusing on social media text, making it particularly adept at handling informal language. The \texttt{5CD-AI/Vietnamese-Sentiment-visobert} model is a fine-tuned version of ViSoBERT, specifically optimized for sentiment classification, making it an ideal candidate for our research.
|
| 75 |
+
|
| 76 |
+
\section{Methodology}
|
| 77 |
+
\label{sec:methodology}
|
| 78 |
+
Our methodology consists of three main stages: dataset acquisition and preparation, model selection and configuration, and the fine-tuning process.
|
| 79 |
+
|
| 80 |
+
\subsection{Dataset: Vietnamese Students Feedback}
|
| 81 |
+
\label{subsec:dataset}
|
| 82 |
+
For this study, we selected the \texttt{uitnlp/vietnamese\_students\_feedback} dataset from the Hugging Face Hub \cite{uit_feedback_dataset}. This dataset is highly relevant as it contains real-world feedback from Vietnamese students. The dataset is pre-labeled into three sentiment categories: Positive, Neutral, and Negative.
|
| 83 |
+
|
| 84 |
+
\begin{table}[h!]
|
| 85 |
+
\centering
|
| 86 |
+
\caption{Statistics of the \texttt{uitnlp/vietnamese\_students\_feedback} dataset.}
|
| 87 |
+
\label{tab:dataset_stats}
|
| 88 |
+
\begin{tabular}{lrrr}
|
| 89 |
+
\toprule
|
| 90 |
+
\textbf{Split} & \textbf{Positive} & \textbf{Neutral} & \textbf{Negative} \\
|
| 91 |
+
\midrule
|
| 92 |
+
Train & 4,825 & 1,598 & 1,577 \\
|
| 93 |
+
Validation & 603 & 200 & 197 \\
|
| 94 |
+
Test & 604 & 200 & 197 \\
|
| 95 |
+
\midrule
|
| 96 |
+
\textbf{Total} & \textbf{6,032} & \textbf{1,998} & \textbf{1,971} \\
|
| 97 |
+
\bottomrule
|
| 98 |
+
\end{tabular}
|
| 99 |
+
\end{table}
|
| 100 |
+
|
| 101 |
+
As shown in Table \ref{tab:dataset_stats}, the dataset is imbalanced, with a significantly higher number of Positive samples. This reflects a common real-world scenario where feedback is often more positive or neutral. Each entry consists of a sentence-level feedback text. Example sentences include:
|
| 102 |
+
\begin{itemize}
|
| 103 |
+
\item \textit{(Positive)} Vietnamese: "Giang vien day rat nhiet tinh va de hieu." (The lecturer teaches very enthusiastically and is easy to understand.)
|
| 104 |
+
\item \textit{(Negative)} Vietnamese: "Khoa hoc qua te, khong hoc duoc gi." (The course is terrible, I didn't learn anything.)
|
| 105 |
+
\item \textit{(Neutral)} Vietnamese: "Bai giang hom nay noi ve phan 3." (Today's lecture was about part 3.)
|
| 106 |
+
\end{itemize}
|
| 107 |
+
|
| 108 |
+
\subsection{Preprocessing}
|
| 109 |
+
The preprocessing steps were minimal to preserve the original characteristics of the student feedback. We performed:
|
| 110 |
+
\begin{itemize}
|
| 111 |
+
\item \textbf{Text Normalization}: Converting all text to lowercase.
|
| 112 |
+
\item \textbf{Tokenization}: Using the ViSoBERT tokenizer, which is optimized for Vietnamese text and handles sub-word units via Byte-Pair Encoding (BPE).
|
| 113 |
+
\end{itemize}
|
| 114 |
+
No stop-word removal or stemming was performed, as these can often negatively impact the performance of Transformer models, which are designed to understand context from full sentences.
|
| 115 |
+
|
| 116 |
+
\subsection{Model: Vietnamese-Sentiment-visobert}
|
| 117 |
+
We selected the \texttt{5CD-AI/Vietnamese-Sentiment-visobert} model as our base. This model is built upon the ViSoBERT architecture, which is a RoBERTa-style model pre-trained on 20GB of Vietnamese social media text. The key advantage is that it has already been fine-tuned for a general sentiment analysis task, providing a strong, task-specific initialization.
|
| 118 |
+
|
| 119 |
+
The model architecture consists of a 12-layer Transformer encoder. For our specific classification task, we used the model's standard classification head, which takes the final hidden state of the `[CLS]` token and passes it through a fully connected layer with a softmax activation function to output probabilities for the three classes (Positive, Neutral, Negative).
|
| 120 |
+
|
| 121 |
+
\subsection{Fine-tuning Strategy and Procedure}
|
| 122 |
+
\label{subsec:finetuning_procedure}
|
| 123 |
+
The core of our methodology involves adapting the pre-trained \texttt{5CD-AI/Vietnamese-Sentiment-visobert} model to the specific domain of student feedback. Fine-tuning, rather than training from scratch, is chosen to leverage the rich linguistic knowledge already captured by the model during its pre-training phase on a massive Vietnamese corpus. This approach is computationally efficient and typically yields superior performance, especially when the downstream dataset is relatively small.
|
| 124 |
+
|
| 125 |
+
\subsubsection{Model Head Adaptation}
|
| 126 |
+
The base \texttt{Vietnamese-Sentiment-visobert} model is a Transformer encoder that outputs contextualized embeddings for each input token. For our classification task, we utilize the final hidden state corresponding to the special classification token (`[CLS]`). This state is then fed into a newly initialized, task-specific classification head. This head is a simple linear layer that maps the 768-dimensional embedding (the hidden size of ViSoBERT) to a 3-dimensional output vector, corresponding to our sentiment classes: Positive, Neutral, and Negative. A softmax function is applied to this vector to produce the final probability distribution over the classes.
|
| 127 |
+
|
| 128 |
+
\subsubsection{Loss Function and Optimization}
|
| 129 |
+
We employ the standard \textbf{Cross-Entropy Loss}, also known as negative log-likelihood loss, which is well-suited for multi-class classification problems. This loss function measures the dissimilarity between the predicted probability distribution and the true one-hot encoded label.
|
| 130 |
+
|
| 131 |
+
For optimization, we use the \textbf{AdamW} optimizer \cite{loshchilov2018decoupled}, a variant of Adam that decouples weight decay from the gradient updates. This has been shown to improve generalization. The hyperparameters for the optimization process were carefully selected and are summarized in Table \ref{tab:hyperparameters}. A small learning rate of $2 \times 10^{-5}$ is used to make fine-grained adjustments to the pre-trained weights without causing catastrophic forgetting. A weight decay of 0.01 is applied as a regularization term to prevent overfitting.
|
| 132 |
+
|
| 133 |
+
\begin{table}[h!]
|
| 134 |
+
\centering
|
| 135 |
+
\caption{Hyperparameters used for fine-tuning.}
|
| 136 |
+
\label{tab:hyperparameters}
|
| 137 |
+
\begin{tabular}{lr}
|
| 138 |
+
\toprule
|
| 139 |
+
\textbf{Hyperparameter} & \textbf{Value} \\
|
| 140 |
+
\midrule
|
| 141 |
+
Optimizer & AdamW \\
|
| 142 |
+
Learning Rate & $2 \times 10^{-5}$ \\
|
| 143 |
+
Batch Size & 16 \\
|
| 144 |
+
Number of Epochs & 3 \\
|
| 145 |
+
Weight Decay & 0.01 \\
|
| 146 |
+
Scheduler & Linear Warmup \\
|
| 147 |
+
\bottomrule
|
| 148 |
+
\end{tabular}
|
| 149 |
+
\end{table}
|
| 150 |
+
|
| 151 |
+
\subsubsection{Training Schedule and Regularization}
|
| 152 |
+
The training was conducted for a maximum of 3 epochs. We implemented a \textbf{linear learning rate scheduler with a warmup period} for the first 10\% of the total training steps. This warmup helps to stabilize the model's training at the beginning, preventing large, destabilizing updates to the pre-trained weights.
|
| 153 |
+
|
| 154 |
+
To further ensure training stability, we employed \textbf{gradient clipping} with a maximum norm of 1.0. This technique prevents the "exploding gradient" problem, which can occur in deep networks and lead to model divergence.
|
| 155 |
+
|
| 156 |
+
The model's performance was monitored on the validation set at the end of each epoch. The final model for evaluation was selected based on the highest macro F1-score achieved on the validation set, a strategy that prioritizes balanced performance across all classes.
|
| 157 |
+
|
| 158 |
+
\subsubsection{Implementation Details}
|
| 159 |
+
The entire fine-tuning process was implemented using the Hugging Face \texttt{Transformers} library in PyTorch. The library's high-level \texttt{Trainer} API was used to streamline the training loop, evaluation, and optimization. The specific configuration for the \texttt{TrainingArguments} is provided in Listing \ref{lst:training_args}.
|
| 160 |
+
|
| 161 |
+
\begin{lstlisting}[caption={PyTorch \texttt{TrainingArguments} configuration for fine-tuning.}, label={lst:training_args}]
|
| 162 |
+
from transformers import TrainingArguments
|
| 163 |
+
|
| 164 |
+
training_args = TrainingArguments(
|
| 165 |
+
output_dir="./results", # Directory to save model checkpoints
|
| 166 |
+
num_train_epochs=3, # Total number of training epochs
|
| 167 |
+
per_device_train_batch_size=16, # Batch size per device during training
|
| 168 |
+
per_device_eval_batch_size=16, # Batch size for evaluation
|
| 169 |
+
warmup_steps=0.1 * total_steps, # Number of warmup steps for learning rate scheduler
|
| 170 |
+
weight_decay=0.01, # Strength of weight decay
|
| 171 |
+
learning_rate=2e-5, # Initial learning rate for AdamW
|
| 172 |
+
logging_dir='./logs', # Directory for storing logs
|
| 173 |
+
logging_steps=50, # Log training loss every 50 steps
|
| 174 |
+
evaluation_strategy="epoch", # Evaluation is done at the end of each epoch
|
| 175 |
+
save_strategy="epoch", # Save a model checkpoint at the end of each epoch
|
| 176 |
+
load_best_model_at_end=True, # Load the best model when training ends
|
| 177 |
+
metric_for_best_model="macro_f1",# Use macro f1-score to identify the best model
|
| 178 |
+
greater_is_better=True, # A higher metric value is better
|
| 179 |
+
)
|
| 180 |
+
\end{lstlisting}
|
| 181 |
+
|
| 182 |
+
The experiments were conducted on a server equipped with an NVIDIA Tesla V100 GPU (32GB VRAM), which allowed for efficient training with the specified batch size. The software environment included Python 3.8, PyTorch 1.12, and the Hugging Face \texttt{Transformers} 4.21 library.
|
| 183 |
+
|
| 184 |
+
\section{Experiments and Results}
|
| 185 |
+
\label{sec:results}
|
| 186 |
+
\subsection{Experimental Setup}
|
| 187 |
+
The experiments were conducted on a server equipped with an NVIDIA Tesla V100 GPU (32GB VRAM). The implementation used Python 3.8, PyTorch 1.12, and the Hugging Face \texttt{Transformers} 4.21 library.
|
| 188 |
+
|
| 189 |
+
\subsection{Evaluation Metrics}
|
| 190 |
+
To evaluate the performance of our fine-tuned model, we used standard metrics for multi-class classification:
|
| 191 |
+
\begin{itemize}
|
| 192 |
+
\item \textbf{Accuracy}: The ratio of correctly predicted instances to the total instances.
|
| 193 |
+
\item \textbf{Precision}: The ratio of true positive predictions to the total positive predictions.
|
| 194 |
+
\item \textbf{Recall}: The ratio of true positive predictions to the total actual positives.
|
| 195 |
+
\item \textbf{F1-Score}: The harmonic mean of Precision and Recall.
|
| 196 |
+
\end{itemize}
|
| 197 |
+
We report both macro-averaged and weighted-averaged scores to account for the class imbalance in the dataset.
|
| 198 |
+
|
| 199 |
+
\subsection{Performance Analysis}
|
| 200 |
+
The performance of the fine-tuned model on the test set is presented in Table \ref{tab:performance_results}. The model achieved an overall accuracy of 93.5\%. The high macro F1-score of 0.93 indicates that the model performs well across all classes, not just the majority Positive class.
|
| 201 |
+
|
| 202 |
+
\begin{table}[h!]
|
| 203 |
+
\centering
|
| 204 |
+
\caption{Performance of the fine-tuned model on the test set.}
|
| 205 |
+
\label{tab:performance_results}
|
| 206 |
+
\begin{tabular}{lcccc}
|
| 207 |
+
\toprule
|
| 208 |
+
\textbf{Class} & \textbf{Precision} & \textbf{Recall} & \textbf{F1-Score} & \textbf{Support} \\
|
| 209 |
+
\midrule
|
| 210 |
+
Positive & 0.95 & 0.96 & 0.95 & 604 \\
|
| 211 |
+
Neutral & 0.88 & 0.85 & 0.86 & 200 \\
|
| 212 |
+
Negative & 0.92 & 0.91 & 0.91 & 197 \\
|
| 213 |
+
\midrule
|
| 214 |
+
\textbf{Macro Avg} & \textbf{0.92} & \textbf{0.91} & \textbf{0.91} & \textbf{1001} \\
|
| 215 |
+
\textbf{Weighted Avg} & \textbf{0.93} & \textbf{0.94} & \textbf{0.93} & \textbf{1001} \\
|
| 216 |
+
\bottomrule
|
| 217 |
+
\end{tabular}
|
| 218 |
+
\end{table}
|
| 219 |
+
|
| 220 |
+
The model shows particularly strong performance on the Positive class, which is expected given the larger number of training samples. The performance on the Neutral and Negative classes is also very strong, demonstrating the model's ability to distinguish subtle differences in sentiment. This high performance can be attributed to the power of the base ViSoBERT model, which is pre-trained on informal Vietnamese text similar to student feedback, and the effectiveness of fine-tuning on a domain-specific dataset.
|
| 221 |
+
|
| 222 |
+
\section{Conclusion}
|
| 223 |
+
\label{sec:conclusion}
|
| 224 |
+
In this paper, we successfully demonstrated the fine-tuning of the \texttt{5CD-AI/Vietnamese-Sentiment-visobert} model on the \texttt{uitnlp/vietnamese\_students\_feedback} dataset for the task of sentiment analysis. Our results show that this approach yields a highly accurate and robust model, achieving a macro F1-score of 0.93. This work underscores the effectiveness of leveraging large, pre-trained language models and adapting them to specific domains.
|
| 225 |
+
|
| 226 |
+
Future work could explore several directions:
|
| 227 |
+
\begin{itemize}
|
| 228 |
+
\item \textbf{Aspect-Based Sentiment Analysis (ABSA)}: Moving beyond sentence-level sentiment to identify opinions towards specific aspects like ``giang vien'' (lecturer), ``khoa hoc'' (course), or ``co so vat chat'' (facilities).
|
| 229 |
+
\item \textbf{Multi-Granularity Analysis}: Analyzing feedback at both the sentence and document level.
|
| 230 |
+
\item \textbf{Deployment}: Integrating the fine-tuned model into a real-time dashboard for educational institutions to monitor and respond to student feedback effectively.
|
| 231 |
+
\end{itemize}
|
| 232 |
+
|
| 233 |
+
% --- REFERENCES ---
|
| 234 |
+
\begin{thebibliography}{9}
|
| 235 |
+
\bibitem{liu2012sentiment}
|
| 236 |
+
B. Liu, ``Sentiment analysis and opinion mining,'' \textit{Synthesis lectures on human language technologies}, vol. 5, no. 1, pp. 1--167, 2012.
|
| 237 |
+
|
| 238 |
+
\bibitem{devlin2019bert}
|
| 239 |
+
J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova, ``BERT: Pre-training of deep bidirectional transformers for language understanding,'' in \textit{Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, 2019, pp. 4171--4186.
|
| 240 |
+
|
| 241 |
+
\bibitem{phobert}
|
| 242 |
+
D. V. Nguyen and T. V. Nguyen, ``PhoBERT: Pre-trained language models for Vietnamese,'' \textit{arXiv preprint arXiv:2009.08157}, 2020.
|
| 243 |
+
|
| 244 |
+
\bibitem{visobert}
|
| 245 |
+
L. H. T. Luong, V. C. Tong, T. H. Le, and N. L. T. Nguyen, ``ViSoBERT: A Vietnamese social media text representation BERT-based model,'' in \textit{2020 7th NAFOSTED Conference on Information and Computer Science (NICS)}, 2020, pp. 348--353.
|
| 246 |
+
|
| 247 |
+
\bibitem{uit_feedback_dataset}
|
| 248 |
+
\texttt{uitnlp/vietnamese\_students\_feedback}, Hugging Face Datasets, 2022. [Online]. Available: \url{https://huggingface.co/datasets/uitnlp/vietnamese_students_feedback}
|
| 249 |
+
|
| 250 |
+
\bibitem{vaswani2017attention}
|
| 251 |
+
A. Vaswani et al., ``Attention is all you need,'' in \textit{Advances in neural information processing systems}, 2017, pp. 5998--6008.
|
| 252 |
+
|
| 253 |
+
\bibitem{turney2002thumbs}
|
| 254 |
+
P. D. Turney, ``Thumbs up? Sentiment classification using machine learning techniques,'' in \textit{Proceedings of the ACL-02 conference on Empirical methods in natural language processing}, 2002, pp. 79--86.
|
| 255 |
+
|
| 256 |
+
\bibitem{pang2002thumbs}
|
| 257 |
+
B. Pang, L. Lee, and S. Vaithyanathan, ``Thumbs up? Sentiment classification using machine learning techniques,'' in \textit{Proceedings of the ACL-02 conference on Empirical methods in natural language processing}, 2002, pp. 79--86.
|
| 258 |
+
|
| 259 |
+
\bibitem{loshchilov2018decoupled}
|
| 260 |
+
I. Loshchilov and F. Hutter, ``Decoupled weight decay regularization,'' in \textit{International Conference on Learning Representations}, 2018.
|
| 261 |
+
|
| 262 |
+
\end{thebibliography}
|
| 263 |
+
|
| 264 |
+
\end{document}
|