@inproceedings{d14e8e45843a4011918c3f929d41e5fd,
title = "Using a hierarchical softmax based on the huffman coding tree for authenticating arabic tweets",
abstract = "Attributing a piece of text to its true author is called Authorship Authentication (AA). This work addresses the AA problem of Arabic tweets. Arabic language is both challenging and understudied. Existing approaches on authenticating Arabic tweets used bag of words features or Stylometric Features coupled with classifiers like SVM. However, the reported accuracy for these approaches is rather low and did not even reach 69\%. In this work, we address this problem using two approaches. (a) A baseline approach that uses SVM along with BoW features, and (b) a character-level linear classifier (char-LC) with a rank constraint and a fast loss approximation along with word embeddings based on fasttext. Both approaches give significantly higher accuracies than the results reported in literature with 78.28\% for the SVM along with BoW approach and 79.4\% for the char-LC.",
keywords = "Authorship authentication of Arabic tweets, Character-level linear classifier, Fasttext embeddings",
author = "Bashar Talafha and Mohammad Al-Smadi and Mahmoud Al-Ayyoub and Yaser Jararweh and Patrick Juola",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 16th ACS/IEEE International Conference on Computer Systems and Applications, AICCSA 2019 ; Conference date: 03-11-2019 Through 07-11-2019",
year = "2019",
month = nov,
doi = "10.1109/AICCSA47632.2019.9035303",
language = "English",
series = "Proceedings of IEEE/ACS International Conference on Computer Systems and Applications, AICCSA",
publisher = "IEEE Computer Society",
booktitle = "16th ACS/IEEE International Conference on Computer Systems and Applications, AICCSA 2019",
address = "United States",
}