Merge pull request #1 from husnainfareed/master

cclauss · web-flow · commit 47ad34403327 · 2019-10-20T18:00:25.000+02:00
binary classification using neural network
diff --git a/neural_network/02-imdb-binary-classification.ipynb b/neural_network/02-imdb-binary-classification.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Hussnain\\Anaconda3\\envs\\tensorflow\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n",
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Imports\n",
+    "from keras.datasets import imdb\n",
+    "\n",
+    "from keras import models\n",
+    "from keras import layers\n",
+    "from keras import optimizers\n",
+    "from keras import losses\n",
+    "from keras import metrics,activations\n",
+    "\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
+      " 1048576/17464789 [>.............................] - ETA: 53:49"
+     ]
+    }
+   ],
+   "source": [
+    "#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
+    "\n",
+    "(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Exploring the dataset\n",
+    "\n",
+    "print('xtrain shape', xtrain.shape)\n",
+    "print('ytrain shape', ytrain.shape)\n",
+    "print()\n",
+    "print('xtest shape', xtest.shape)\n",
+    "print('ytest shape', ytest.shape)\n",
+    "print()\n",
+    "print('xtrain first review as dictionary index', xtrain[1])\n",
+    "print()\n",
+    "print()\n",
+    "print('ytrain label', ytrain[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#index to words mapping\n",
+    "word_index = imdb.get_word_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])\n",
+    "decode_review"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def vectorize_sequences(sequences, dimension=10000):\n",
+    "    results = np.zeros((len(sequences), dimension))\n",
+    "    for i, sequence in enumerate(sequences):\n",
+    "        results[i, sequence] = 1. \n",
+    "    return results\n",
+    "\n",
+    "x_train = vectorize_sequences(xtrain)\n",
+    "x_test = vectorize_sequences(xtest)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ytrain = np.asarray(ytrain).astype('float32')\n",
+    "ytest = np.asarray(ytest).astype('float32')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#model\n",
+    "model = models.Sequential()\n",
+    "model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))\n",
+    "model.add(layers.Dense(16, activation=activations.relu))\n",
+    "model.add(layers.Dense(1, activation=activations.sigmoid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_val = x_train[:10000]\n",
+    "y_val = ytrain[:10000]\n",
+    "\n",
+    "x_train_partial = x_train[10000:]\n",
+    "y_train_partial = ytrain[10000:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))\n",
+    "history_dict = history.history\n",
+    "history_dict.keys()\n",
+    "print(history.history['acc'][-1])\n",
+    "print(history.history['val_acc'][-1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(model.predict(x_train_partial[22:23]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss = history_dict['loss']\n",
+    "val_loss = history_dict['val_loss']\n",
+    "epochs = range(0, len(loss)+1)\n",
+    "epochs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib\n",
+    "acc = history.history['acc']\n",
+    "val_acc = history.history['val_acc']\n",
+    "loss = history.history['loss']\n",
+    "val_loss = history.history['val_loss']\n",
+    "\n",
+    "epochs = range(1, len(acc) + 1)\n",
+    "\n",
+    "# \"bo\" is for \"blue dot\"\n",
+    "plt.plot(epochs, loss, 'ro', label='Training loss')\n",
+    "# b is for \"solid blue line\"\n",
+    "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
+    "plt.title('Training and validation loss')\n",
+    "plt.xlabel('Epochs')\n",
+    "plt.ylabel('Loss')\n",
+    "plt.legend()\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.clf()      # clear figure# clear  \n",
+    "acc_values = history_dict['acc']\n",
+    "val_acc_values = history_dict['val_acc']\n",
+    "\n",
+    "plt.plot(epochs, acc, 'bo', label='Training acc')\n",
+    "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
+    "plt.title('Training and validation accuracy')\n",
+    "plt.xlabel('Epochs')\n",
+    "plt.ylabel('Loss')\n",
+    "plt.legend()\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}