@inproceedings{c72d028408114a7e919b5d4f2153baa7,
title = "Using transliteration with entity resolution for Arabic datasets",
abstract = "Entity resolution (ER) is the operation of distinguishing records that return to the same real world entity. It is used to link records among datasets and to match query records in real-time with existing datasets. Indexing is a major step in the ER process that reduces the search space. Most existing indexing techniques that are utilized in the ER process are designed to work with English datasets. Such techniques may not be suitable for use with other languages, such as Arabic. In this paper, enhancement for indexing techniques that are designed to work with English datasets has been proposed to be used with Arabic language by applying transliteration on Arabic strings before performing the indexing step of the ER process. The proposed approach is experimented and compared with using word stems as blocking keys in the indexing step. The results show better matching accuracy for the use of transliteration over the use of words stems.",
keywords = "Arabic Dataset, Entity Resolution, Indexing, Stemming, Transliteration",
author = "Marwah Alian and Ghazi Al-Naymat and Banda Ramadan",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 14th IEEE/ACS International Conference on Computer Systems and Applications, AICCSA 2017 ; Conference date: 30-10-2017 Through 03-11-2017",
year = "2017",
month = jul,
day = "2",
doi = "10.1109/AICCSA.2017.11",
language = "English",
series = "Proceedings of IEEE/ACS International Conference on Computer Systems and Applications, AICCSA",
publisher = "IEEE Computer Society",
pages = "593--597",
booktitle = "Proceedings - 2017 IEEE/ACS 14th International Conference on Computer Systems and Applications, AICCSA 2017",
address = "United States",
}