-
Notifications
You must be signed in to change notification settings - Fork 5
/
Daneshvar2018.bib
11 lines (11 loc) · 1.34 KB
/
Daneshvar2018.bib
1
2
3
4
5
6
7
8
9
10
11
@inproceedings{Daneshvar2018,
abstract = {In this paper, we describe the participation of the Natural Language Processing Lab of the University of Ottawa in the author profiling shared task at PAN 2018. We present our approach to gender identification in Twitter performed on the tweet corpus provided by CLEF for the task. Our approach takes advantage of textual information solely, and consists of tweet preprocessing, feature con- struction, dimensionality reduction using Latent Semantic Analysis (LSA), and classification model construction. We propose a linear Support Vector Machine (SVM) classifier, with different types of word and character n-grams as features. Our model was the best-performing model in textual classification, with the ac- curacy of 0.8221, 0.82, and 0.809 on the English, Spanish, and Arabic datasets respectively. Considering the combination of textual and image classification, and all three datasets, our model ranked second in the task.},
author = {Daneshvar, Saman and Inkpen, Diana},
booktitle = {CEUR Workshop Proceedings},
issn = {16130073},
keywords = {Author profiling,Gender detection,Natural language processing,Social media,Twitter,User modeling},
title = {{Gender Identification in Twitter using N-grams and LSA: Notebook for PAN at CLEF 2018}},
url = {http://ceur-ws.org/Vol-2125/paper{\_}213.pdf},
volume = {2125},
year = {2018}
}