@inproceedings{1ac0a4f140404080834c093d923ee7e0,
title = "Question to question similarity analysis using morphological, syntactic, semantic, and lexical features",
abstract = "In the digitally connected world that we are living in, people expect to get answers to their questions spontaneously. This fact increased the burden on the Question/Answer platforms such as Stack Overflow and many others. A promising solution to this problem is to detect if a question being asked similar to a question in the database and present the answer of the detected question to the user. To address this challenge, we propose a novel Natural Language Processing (NLP) approach that detects if two Arabic questions are similar or not using their extracted morphological, syntactic, semantic, lexical features. Our approach involves several phases including Arabic text processing, novel feature extraction, and text classifications. To conduct our experiments, we used a real-world questions dataset consisting of 4,000 pairs of Arabic questions in which our approach achieved 78.2\% accuracy using XGBoost model on the best features selected by the Random Forest feature selection technique. This high accuracy shows the ability of our approach to correctly detect the similarity between two Arabic questions.",
keywords = "Arabic Language, Lexical Features, ML, NLP, Random Forest, SVM, Semantic Text Similarity (STS), Text Classification, XGBoost",
author = "Muntaha Al-Asa'd and Nour Al-Khdour and Younes, \{Mutaz Bni\} and Enas Khwaileh and Mahmoud Hammad and Mohammad Al-Smadi",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 16th ACS/IEEE International Conference on Computer Systems and Applications, AICCSA 2019 ; Conference date: 03-11-2019 Through 07-11-2019",
year = "2019",
month = nov,
doi = "10.1109/AICCSA47632.2019.9035248",
language = "English",
series = "Proceedings of IEEE/ACS International Conference on Computer Systems and Applications, AICCSA",
publisher = "IEEE Computer Society",
booktitle = "16th ACS/IEEE International Conference on Computer Systems and Applications, AICCSA 2019",
address = "United States",
}