Categorization Interpretation ExampleΒΆ

A visual interpretation for the binary categorization outcome for a single document by looking at the relative contribution of individual words

from __future__ import print_function

import os
from sklearn.datasets import fetch_20newsgroups
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib as mpl
mpl.use('Agg')

import matplotlib.pyplot as plt

from freediscovery.categorization import binary_sensitivity_analysis
from freediscovery.interpretation import explain_categorization, _make_cmap


newsgroups = fetch_20newsgroups(subset='train', categories=['sci.space', 'comp.graphics'],
                                remove=('headers', 'footers', 'quotes'))

document_id = 312  # the document id we want to visualize

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(newsgroups.data)

clf = LogisticRegression()
clf.fit(X, newsgroups.target)

repr_proba = 'Predicted: {0}: {{0:.2f}}, {1}: {{1:.2f}}'.format(*newsgroups.target_names)
print(repr_proba.format(*clf.predict_proba(X[document_id])[0]))
print('Actual label :', newsgroups.target_names[newsgroups.target[document_id]])


weights = binary_sensitivity_analysis(clf, vectorizer.vocabulary_, X[document_id])

cmap = _make_cmap(alpha=0.2, filter_ratio=0.15)
html, norm = explain_categorization(weights, newsgroups.data[document_id], cmap)

fig, ax = plt.subplots(1, 1, figsize=(6, 1.2))
plt.subplots_adjust(bottom=0.4, top=0.7)
cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm, orientation='horizontal')

cb1.set_label('{} < ----- > {}'.format(*newsgroups.target_names))
ax.set_title('Relative word weights', fontsize=12)

# visualize the html results in sphinx gallery
tmp_dir = os.path.join('..', '..', 'doc', 'python', 'examples')
print(os.path.abspath(tmp_dir))
if os.path.exists(tmp_dir):
    with open(os.path.join(tmp_dir, 'out.html'), 'wt') as fh:
        fh.write(html)
../../_images/sphx_glr_categorization_interpretation_001.png

Out:

Predicted: comp.graphics: 0.39, sci.space: 0.61
Actual label : sci.space
/home/ubuntu/FreeDiscovery/doc/python/examples
Can anyone tell me where I might find stereo images of planetary and
planetary satellite surfaces? GIFs preferred, but any will do. I'm
especially interested in stereos of the surfaces of Phobos, Deimos, Mars
and the Moon (in that order).
Thanks.

Total running time of the script: ( 0 minutes 11.147 seconds)

Generated by Sphinx-Gallery