Skip to content
This repository was archived by the owner on Apr 23, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,15 @@ examples/repindex_sentiment.py
# logs
*.log
examples/results/

textlytics/data/w2v_models/word2vec_lexical_dico\.txt

textlytics/data/w2v_models/word2vec_lexical\.txt

textlytics/data/w2v_models/word2vec_pos_dico\.txt

textlytics/data/w2v_models/word2vec_pos\.txt

textlytics/data/w2v_models/word2vec_smiley_dico\.txt

textlytics/data/w2v_models/word2vec_smiley\.txt
2 changes: 1 addition & 1 deletion examples/amazon_w2v_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from glob import glob
from os.path import join

import textlytics.word_vectorization.amazon_w2v as aw2v
import textlytics.data.word_vectorization.amazon_w2v as aw2v

amazon_path = '/datasets/amazon-data/new-julian/domains'
output_path = '/datasets/amazon-data/new-julian/domains/word_vectorization-models-overall'
Expand Down
2 changes: 1 addition & 1 deletion examples/doc2vec_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-

import textlytics.word_vectorization.doc2vec_sentiment as d2v
import textlytics.data.word_vectorization.doc2vec_sentiment as d2v

d2v.run()
43 changes: 9 additions & 34 deletions examples/experiments_code/timik/sentiment_messages.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
# coding: utf-8
from __future__ import print_function

import pickle
import multiprocessing
import logging
import sys
import multiprocessing
import pickle

import pandas as pd
import numpy as np

from __future__ import print_function

from sklearn.cross_validation import StratifiedKFold
import pandas as pd
import sys
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import f1_score, accuracy_score, recall_score, \
precision_score, confusion_matrix
from sklearn.svm import LinearSVC

logging.basicConfig(filename='sentiment-timik.log', level=logging.DEBUG)
log = logging.getLogger()
Expand Down Expand Up @@ -82,27 +78,7 @@
# print('Negative emoticons: {}'.format(emoticons_negative))


# In[21]:

# m_sent.groupby('sentiment').describe()


# In[18]:

# df.emoticons.unique()


# In[20]:

# df.groupby(['emoticons']).count()


# # Sentiment analysis based on characters

# In[27]:

def superv_sent(docs, y, result_queue, ngram_range=(1, 1), analyzer='char_wb',
n_folds=10):
def superv_sent(docs, y, result_queue, ngram_range=(1, 1), analyzer='char_wb', n_folds=10):
logging.info('Analyzer: {}'.format(analyzer))
logging.info('ngram_range: {}'.format(ngram_range))
vectorizer = CountVectorizer(analyzer=analyzer, ngram_range=ngram_range,
Expand Down Expand Up @@ -185,7 +161,6 @@ def superv_sent(docs, y, result_queue, ngram_range=(1, 1), analyzer='char_wb',
# 'ExtraTreeClassifier': ExtraTreeClassifier()
}


n = messages[messages.sentiment == 'negative'].shape[0]
messages[messages.sentiment == 'positive'].head(n)
messages = pd.concat([messages[messages.sentiment == 'negative'],
Expand Down Expand Up @@ -218,8 +193,8 @@ def superv_sent(docs, y, result_queue, ngram_range=(1, 1), analyzer='char_wb',
log.info('Add process for {}'.format(param))
p = multiprocessing.Process(target=superv_sent,
args=(
docs, y, result_queue, param['ngram_range'],
param['analyzer'], n_folds))
docs, y, result_queue, param['ngram_range'],
param['analyzer'], n_folds))
p.start()
jobs.append(p)

Expand Down
6 changes: 3 additions & 3 deletions examples/frequentiment_lexicons_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
import pandas as pd
from joblib import Parallel
from joblib import delayed
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.lexicons import SentimentLexicons
from textlytics.data.lexicons import SentimentLexicons
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle

log = logging.getLogger()
log.setLevel(logging.DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion examples/gensim_vectorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sklearn.tree import DecisionTreeClassifier
from textlytics.processing.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle
from textlytics.utils import list_to_str

logging.basicConfig(filename='gensim_vectorization.log')
Expand Down
2 changes: 1 addition & 1 deletion examples/lexicons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.document_preprocessing import DocumentPreprocessor
from textlytics.preprocessing.text_preprocessing import DocumentPreprocessor

df = pd.read_csv('C:\Users\Dell\Documents\GitHub\word2vec\d2v-vs-bow\Automotive9600.csv')

Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment-pipeline-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

from textlytics.sentiment.document_preprocessing import DocumentPreprocessor
from textlytics.preprocessing.text_preprocessing import DocumentPreprocessor

logging.basicConfig(filename='train_sent_superv_model.log')
log = logging.getLogger()
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_amazon_full_supervised_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle

logging.basicConfig(filename='generate_lexicons_and_results.log')
log = logging.getLogger()
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_amazon_supervised_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier

from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.sentiment import Sentiment
from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle

log = logging.getLogger()
log.setLevel(logging.DEBUG)
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_amazon_supervised_example_cv_fixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle

logging.basicConfig(filename='generate_lexicons_and_results.log')
log = logging.getLogger()
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle
from textlytics.utils import list_to_str

logging.basicConfig(filename='generate_lexicons_and_results.log')
Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import results_to_pickle
from textlytics.data.sentiment import results_to_pickle

log = logging.getLogger()
log.setLevel(logging.DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_kfolds.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
DocumentPreprocessor
from textlytics.processing.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle

logging.basicConfig(filename='generate_lexicons_and_results.log')
log = logging.getLogger()
Expand Down
6 changes: 3 additions & 3 deletions examples/sentiment_lexicons.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
from os.path import join, basename, exists

import pandas as pd
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.lexicons import SentimentLexicons
from textlytics.data.lexicons import SentimentLexicons
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle

log = logging.getLogger()
log.setLevel(logging.DEBUG)
Expand Down
9 changes: 5 additions & 4 deletions examples/sentiment_lexicons_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from os import makedirs
from os.path import join, exists

from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.io_sentiment import Dataset
from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.sentiment.lexicons import SentimentLexicons
from textlytics.data.lexicons import SentimentLexicons
from textlytics.sentiment.sentiment import Sentiment

from textlytics.data.sentiment import Dataset
from textlytics.data.sentiment import to_pickle

log = logging.getLogger()
log.setLevel(logging.DEBUG)

Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_ngrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.svm import LinearSVC
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import results_to_pickle
from textlytics.data.sentiment import results_to_pickle

logging.basicConfig(filename='processing.log', level=logging.DEBUG,
format='%(asctime)s - sentiment_ngrams.py - '
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_process_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pprint import pprint

from sklearn.linear_model import LogisticRegression
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor

__author__ = 'Lukasz Augustyniak'
Expand All @@ -17,7 +17,7 @@
from datetime import datetime

from textlytics.sentiment.sentiment import Sentiment
from textlytics.sentiment.io_sentiment import results_to_pickle
from textlytics.data.sentiment import results_to_pickle

logging.basicConfig(filename='processing.log',
level=logging.DEBUG,
Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_superv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.linear_model import LogisticRegression
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import results_to_pickle
from textlytics.data.sentiment import results_to_pickle

log = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_supervised_4_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor

logging.basicConfig(filename='generate_lexicons_and_results.log')
Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_supervised_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor

logging.basicConfig(filename='generate_lexicons_and_results.log')
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_supervised_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from sklearn.svm import LinearSVC
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import Dataset
from textlytics.sentiment.io_sentiment import results_to_pickle
from textlytics.data.sentiment import Dataset
from textlytics.data.sentiment import results_to_pickle

logging.basicConfig(filename='sentiment-supervised-example.log')
log = logging.getLogger()
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_supervised_example_imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from sklearn.svm import LinearSVC
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import Dataset
from textlytics.sentiment.io_sentiment import results_to_pickle
from textlytics.data.sentiment import Dataset
from textlytics.data.sentiment import results_to_pickle

logging.basicConfig(filename='sentiment-supervised-example.log')
log = logging.getLogger()
Expand Down
4 changes: 2 additions & 2 deletions examples/sentiment_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
import pandas as pd
from gensim.models import Word2Vec
from sklearn.linear_model import LogisticRegression
from textlytics.sentiment.document_preprocessing import \
from textlytics.preprocessing.text_preprocessing import \
DocumentPreprocessor
from textlytics.sentiment.sentiment import Sentiment

from textlytics.sentiment.io_sentiment import to_pickle
from textlytics.data.sentiment import to_pickle
from textlytics.utils import list_to_str

logging.basicConfig(filename='generate_lexicons_and_results.log')
Expand Down
42 changes: 21 additions & 21 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
beautifulsoup4==4.4.1
dill==0.2.5
# todo check if 2.2.0 will work
gensim==2.2.0
# graph_tool==2.11
joblib==0.9.4
line_profiler==2.0
matplotlib==2.0.2
memory_profiler==0.43
networkx==1.11
nltk==3.2.1
numpy==1.12.0
pandas==0.20.1
Pattern==2.6
pyenchant==1.6.8
scikit_learn==0.18.2
scipy==0.19.0
simplejson==3.10.0
spacy==1.7
stemming==1.0.1
xlsxwriter==0.9.6
beautifulsoup4
dill
gensim
html5lib
joblib
keras
line_profiler
matplotlib
memory_profiler
networkx
nltk
numpy
pandas
pyenchant
scikit_learn
scipy
simplejson
spacy
stemming
wordfreq
xlsxwriter
Loading