@inbook{1ced0b9bb1c9454ab80016f5dca1cccd,
title = "Comparison of Documents Classification Techniques to Classify Medical Reports",
abstract = "This paper addresses a real world problem: the classification of text documents in the medical domain. There are a number of approaches to classifying text documents. Here, we use a partially supervised classification approach and argue that it is effective and computationally efficient for real-world problems. The approach uses a two-step strategy to cut down on the effort required to label each document for classification. Only a small set of positive documents are labeled initially, with others being labeled automatically as a result of the first step. The second step builds the actual text classifier. There are a number of methods that have been proposed for each step. A comprehensive evaluation of various combinations of methods is conducted to compare their performances using real world medical documents. The results show that using EM based methods to build the classifier yields better results than SVM. We also experimentally show that careful selection of a subset of features to represent the documents can improve the performance of the classifiers.",
author = "F. Saad and {de la Iglesia}, B. and Bell, {G. D.}",
note = "Proceedings of the 10th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD 2006).",
year = "2006",
doi = "10.1007/11731139_34",
language = "English",
isbn = "978-3-540-33206-0",
volume = "3918",
series = "Lecture Notes in Computer Science",
publisher = "Springer Berlin / Heidelberg",
pages = "285--291",
editor = "Wee-Keong Ng and Masaru Kitsuregawa and Jianzhong Li and Kuiyu Chang",
booktitle = "Advances in Knowledge Discovery and Data Mining",
}