{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#installing the li
aries"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: geoplotlib in c:\\users\\dell\\anaconda3\\lib\\site-packages (0.3.2)\n"
]
}
],
"source": [
"!pip install geoplotlib"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pyglet in c:\\users\\dell\\anaconda3\\lib\\site-packages (1.5.5)\n"
]
}
],
"source": [
"!pip install pyglet"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import geoplotlib"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#to display the maps in the jupyter notebook\n",
"from IPython.display import Image"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#reading the data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stde
",
"output_type": "stream",
"text": [
"C:\\Users\\DELL\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
]
}
],
"source": [
"#data is saved at the same location as the cu
ent file location\n",
"\n",
"df = pd.read_csv(\"world_cities_pop.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#because we have mixed datatypes , so we force \n",
"#python to consider it as a character because of the mixed data types\n",
"#reading data again with modifications\n",
"df = pd.read_csv(\"world_cities_pop.csv\" , dtype = {'Region' : np.str})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a
ay(['06', '07', '04', '05', '02', '03', '08', '01', '29', '10', '24',\n",
" '09', '35', '42', '11', '27', '39', '28', '26', '17', '41', '33',\n",
" '30', '13', '40', '18', '23', '19', '37', '14', '32', '36', '31',\n",
" '34', '38', nan, '00', '51', '46', '49', '43', '47', '44', '45',\n",
" '50', '48', '15', '12', '20', '16', '21', '22', '62', '68', '65',\n",
" '64', '66', '58', '60', '61', '71', '57'], dtype=object)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.unique(df['Region'])[:62]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3173958, 7)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#size of the orignal dataset\n",
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Country object\n",
"City object\n",
"AccentCity object\n",
"Region object\n",
"Population float64\n",
"Latitude float64\n",
"Longitude float64\n",
"dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#checking the data types\n",
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"