NLTK ERROR TUTORIAL Creating a module for Sentiment Analysis with NLTK
Hi everyone! I am new with the NLTK and I have been following the tutorial. The problem is that I want to make a Sentiment Analyzer based on Amazon reviews, and following the steps of the part 19: "Creating a module for Sentiment Analysis with NLTK", I have a problem when I try to analyze the sentiment of a new text, as the VoteClassifier predicts me all the new texts as possitives.
Can somebody help me?
Thank u in advance!
I leave here my code:
#PART I: Training the classifiers
import nltk import random from nltk.classify.scikitlearn import SklearnClassifier import pickle from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.svm import SVC, LinearSVC, NuSVC from nltk.classify import ClassifierI from statistics import mode from nltk.tokenize import word_tokenize
class VoteClassifier(ClassifierI): def __init__(self, *classifiers): self._classifiers = classifiers
def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) return mode(votes)
def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v)
# j is adject, r is adverb, and v is verb #allowed_word_types = ["J","R","V"] allowed_word_types = ["J"]
for p in short_pos.split('n'): documents.append( (p, "pos") ) words = word_tokenize(p) pos = nltk.pos_tag(words) for w in pos: if w[1][0] in allowed_word_types: all_words.append(w[0].lower())
for p in short_neg.split('n'): documents.append( (p, "neg") ) words = word_tokenize(p) pos = nltk.pos_tag(words) for w in pos: if w[1][0] in allowed_word_types: all_words.append(w[0].lower())
#PART II: CREATNG THE SENTIMENT ANALYZER (where I think I have the problem)
#File: sentiment_mod.py
import nltk import random from nltk.classify.scikitlearn import SklearnClassifier import pickle from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.svm import SVC, LinearSVC, NuSVC from nltk.classify import ClassifierI from statistics import mode from nltk.tokenize import word_tokenize
class VoteClassifier(ClassifierI): def __init__(self, *classifiers): self._classifiers = classifiers
def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) return mode(votes)
def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v)