# Import all required libraries
import warnings
from prettytable import PrettyTable
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.figure_factory as ff
import altair as alt
alt.renderers.enable('default')
warnings.filterwarnings('ignore')
import_data = pd.read_csv("D:/Datasets/2018-2010_import.csv")
export_data = pd.read_csv("D:/Datasets/2018-2010_export.csv")
styles = [
dict(selector="tr:hover", props=[("background", "#f4f4f4")]),
dict(selector="th",
props=[("color", "#fff"), ("border", "1px solid #eee"),
("padding", "12px 35px"), ("border-collapse", "collapse"),
("background", "#00cccc"), ("font-size", "18px")]),
dict(selector="caption",
props=[
("caption-side", "top"),
("font-size", "25px"),
("border", "1px solid #eee"),
("padding", "12px 35px"),
]),
]
import_data.head().style.set_table_styles(styles).highlight_null(
null_color='#FF8C00').set_caption('Import Data')
HSCode | Commodity | value | country | year | |
---|---|---|---|---|---|
0 | 5 | PRODUCTS OF ANIMAL ORIGIN, NOT ELSEWHERE SPECIFIED OR INCLUDED. | 0.000000 | AFGHANISTAN TIS | 2018 |
1 | 7 | EDIBLE VEGETABLES AND CERTAIN ROOTS AND TUBERS. | 12.380000 | AFGHANISTAN TIS | 2018 |
2 | 8 | EDIBLE FRUIT AND NUTS; PEEL OR CITRUS FRUIT OR MELONS. | 268.600000 | AFGHANISTAN TIS | 2018 |
3 | 9 | COFFEE, TEA, MATE AND SPICES. | 35.480000 | AFGHANISTAN TIS | 2018 |
4 | 11 | PRODUCTS OF THE MILLING INDUSTRY; MALT; STARCHES; INULIN; WHEAT GLUTEN. | nan | AFGHANISTAN TIS | 2018 |
styles = [
dict(selector="tr:hover", props=[("background", "#f4f4f4")]),
dict(selector="th",
props=[("color", "#000000"), ("border", "1px solid #eee"),
("padding", "12px 35px"), ("border-collapse", "collapse"),
("background", "#FF7F50"), ("font-size", "18px")]),
dict(selector="caption",
props=[
("caption-side", "top"),
("font-size", "25px"),
("border", "1px solid #eee"),
("padding", "12px 35px"),
]),
]
import_data.describe().style.set_table_styles(styles).highlight_min(
color='#008B8B').set_caption(
"Short Summary of Numerical Varaibles of Import Data")
HSCode | value | year | |
---|---|---|---|
count | 76124.000000 | 64536.000000 | 76124.000000 |
mean | 53.905023 | 62.361425 | 2014.018299 |
std | 27.546852 | 666.327177 | 2.579385 |
min | 1.000000 | 0.000000 | 2010.000000 |
25% | 30.000000 | 0.030000 | 2012.000000 |
50% | 54.000000 | 0.370000 | 2014.000000 |
75% | 78.000000 | 4.780000 | 2016.000000 |
max | 99.000000 | 32781.570000 | 2018.000000 |
styles = [
dict(selector="tr:hover", props=[("background", "#f4f4f4")]),
dict(selector="th",
props=[("color", "#000000"), ("border", "1px solid #eee"),
("padding", "12px 35px"), ("border-collapse", "collapse"),
("background", "#FFD700"), ("font-size", "18px")]),
dict(selector="caption",
props=[
("caption-side", "top"),
("font-size", "25px"),
("border", "1px solid #eee"),
("padding", "12px 35px"),
]),
]
export_data.describe().style.set_table_styles(styles).highlight_min(
color='#E9967A').set_caption(
"Short Summary of Numerical Varaibles of Export Data")
HSCode | value | year | |
---|---|---|---|
count | 137023.000000 | 122985.000000 | 137023.000000 |
mean | 51.330302 | 21.567829 | 2014.056304 |
std | 28.018026 | 229.701279 | 2.580160 |
min | 1.000000 | 0.000000 | 2010.000000 |
25% | 28.000000 | 0.030000 | 2012.000000 |
50% | 52.000000 | 0.360000 | 2014.000000 |
75% | 74.000000 | 3.770000 | 2016.000000 |
max | 99.000000 | 19805.170000 | 2018.000000 |
# Check Number of Rows where value is zero
print("No of Rows in Import Data where value is zero =",
len(import_data[import_data['value'] == 0]))
print("No of Rows in Export Data where value is zero =",
len(export_data[export_data['value'] == 0]))
No of Rows in Import Data where value is zero = 8596 No of Rows in Export Data where value is zero = 15860
# Drop Missing Values
import_data.dropna(inplace=True)
export_data.dropna(inplace=True)
# Fix some long Commodity names
dic = {
'PRODUCTS OF ANIMAL ORIGIN, NOT ELSEWHERE SPECIFIED OR INCLUDED.':
'ANIMAL PRODUCTS',
'NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHANICAL APPLIANCES; PARTS THEREOF.':
'NUCLEAR EQUIPMENT',
'OPTICAL, PHOTOGRAPHIC CINEMATOGRAPHIC MEASURING, CHECKING PRECISION, MEDICAL OR SURGICAL INST. AND APPARATUS PARTS AND ACCESSORIES THEREOF;':
'OPTICAL INSTRUMENTS',
'FURNITURE; BEDDING, MATTRESSES, MATTRESS SUPPORTS, CUSHIONS AND SIMILAR STUFFED FURNISHING; LAMPS AND LIGHTING FITTINGS NOT ELSEWHERE SPECIFIED OR INC':
'FURNITURE',
'ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS, TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS,AND PARTS.':
'ELECTRICAL MACHINERY',
'RAILWAY OR TRAMWAY LOCOMOTIVES, ROLLING-STOCK AND PARTS THEREOF; RAILWAY OR TRAMWAY TRACK FIXTURES AND FITTINGS AND PARTS THEREOF; MECHANICAL':
'RAILWAY MACHINERY',
'TANNING OR DYEING EXTRACTS; TANNINS AND THEIR DERI. DYES, PIGMENTS AND OTHER COLOURING MATTER; PAINTS AND VER; PUTTY AND OTHER MASTICS; INKS.':
'TANNING EXTRACTS',
'PREPARATIONS OF MEAT, OF FISH OR OF CRUSTACEANS, MOLLUSCS OR OTHER AQUATIC INVERTEBRATES':
'PREPARATIONS OF NONVEG FOOD',
'EXPLOSIVES; PYROTECHNIC PRODUCTS; MATCHES; PYROPHORIC ALLOYS; CERTAIN COMBUSTIBLE PREPARATIONS.':
'EXPLOSIVES',
'VEGETABLE PLAITING MATERIALS; VEGETABLE PRODUCTS NOT ELSEWHERE SPECIFIED OR INCLUDED.':
'VEGETABLE PLAITING MATERIALS',
'ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, NOT KNITTED OR CROCHETED.':
'CLOTHING',
'OTHER MADE UP TEXTILE ARTICLES; SETS; WORN CLOTHING AND WORN TEXTILE ARTICLES; RAGS':
'TEXTILES',
'VEHICLES OTHER THAN RAILWAY OR TRAMWAY ROLLING STOCK, AND PARTS AND ACCESSORIES THEREOF.':
'SPARE PARTS'
}
import_data['Commodity'] = import_data['Commodity'].replace(dic)
export_data['Commodity'] = export_data['Commodity'].replace(dic)
# Fixing Some Country Names
import_data.country = import_data.country.replace({
'U K': 'UK',
'U S A': 'USA',
'AFGHANISTAN TIS': 'AFGHANISTAN',
'BAHARAIN IS': 'BAHARAIN',
'BANGLADESH PR': 'BANGLADESH',
'BOSNIA-HRZGOVIN': 'BOSNIA',
'BURKINA FASO': 'BURKINA',
'CHINA P RP': 'CHINA'
})
export_data.country = export_data.country.replace({
'U K': 'UK',
'U S A': 'USA',
'AFGHANISTAN TIS': 'AFGHANISTAN',
'BAHARAIN IS': 'BAHARAIN',
'BANGLADESH PR': 'BANGLADESH',
'BOSNIA-HRZGOVIN': 'BOSNIA',
'BURKINA FASO': 'BURKINA',
'CHINA P RP': 'CHINA'
})
# Univariate Analysis of HSCode
if len(
set(list(import_data.HSCode.unique())) -
set(list(export_data.HSCode.unique()))) == 0:
print(
"HSCode Values for Import Data and Export Data are different which should happen."
)
else:
print(
"Some HSCode Values are used for both Import and Export which should not happen"
)
HSCode Values for Import Data and Export Data are different which should happen.
# Univariate Analysis of Commodities
unique_import_commodities = import_data.Commodity.unique().tolist()
unique_export_commodities = export_data.Commodity.unique().tolist()
print("No of Unique Commodities Imported from 2010 to 2018 =",
len(unique_import_commodities))
print("No of Unique Commodities Exported from 2010 to 2018 =",
len(unique_export_commodities))
import_commodities = import_data.Commodity.unique()
import_commodities_names = list(
import_data.Commodity.value_counts().head(8).index)
import_commodities_values = list(
import_data.Commodity.value_counts().head(8).values)
export_commodities_names = list(
export_data.Commodity.value_counts().head(8).index)
export_commodities_values = list(
export_data.Commodity.value_counts().head(8).values)
table = PrettyTable([' Top 10 Import Commodity', 'Count'])
for name, value in zip(import_commodities_names, import_commodities_values):
table.add_row([name, value])
print(table)
table = PrettyTable([' Top 10 Export Commodity', 'Count'])
for name, value in zip(export_commodities_names, export_commodities_values):
table.add_row([name, value])
print(table)
No of Unique Commodities Imported from 2010 to 2018 = 98 No of Unique Commodities Exported from 2010 to 2018 = 98 +---------------------------------+-------+ | Top 10 Import Commodity | Count | +---------------------------------+-------+ | ELECTRICAL MACHINERY | 1576 | | NUCLEAR EQUIPMENT | 1504 | | IRON AND STEEL | 1378 | | PLASTIC AND ARTICLES THEREOF. | 1295 | | ALUMINIUM AND ARTICLES THEREOF. | 1289 | | OPTICAL INSTRUMENTS | 1203 | | COPPER AND ARTICLES THEREOF. | 1117 | | ARTICLES OF IRON OR STEEL | 1099 | +---------------------------------+-------+ +-------------------------------+-------+ | Top 10 Export Commodity | Count | +-------------------------------+-------+ | NUCLEAR EQUIPMENT | 1882 | | ELECTRICAL MACHINERY | 1878 | | PHARMACEUTICAL PRODUCTS | 1876 | | OPTICAL INSTRUMENTS | 1849 | | CLOTHING | 1817 | | PLASTIC AND ARTICLES THEREOF. | 1809 | | ARTICLES OF IRON OR STEEL | 1797 | | TEXTILES | 1791 | +-------------------------------+-------+
# Univaraite Analysis of Country
import_countries = import_data.country.unique()
export_countries = export_data.country.unique()
print("No of Countries from which Commodities are Imported =",
len(import_countries))
print("No of Countries to which Commodities are Exported =",
len(export_countries))
import_countries_names = list(
import_data.country.value_counts().head(8).index) + ['Rest All']
import_countries_values = list(
import_data.country.value_counts().head(8).values) + [
len(import_data) - import_data.country.value_counts().head(8).sum()
]
export_countries_names = list(
export_data.country.value_counts().head(8).index) + ['Rest All']
export_countries_values = list(
export_data.country.value_counts().head(8).values) + [
len(export_data) - export_data.country.value_counts().head(8).sum()
]
fig = make_subplots(rows=1,
cols=2,
specs=[[{
'type': 'domain'
}, {
'type': 'domain'
}]])
fig.add_trace(
go.Pie(hoverinfo="label+percent+name",
labels=import_countries_names,
values=import_countries_values,
hole=0.3,
rotation=120), 1, 1)
fig.add_trace(
go.Pie(hoverinfo="label+percent+name",
labels=export_countries_names,
values=export_countries_values,
hole=0.3,
rotation=180), 1, 2)
fig.update_layout(
title_text='Side by Side Donut Chart of Top 8 Import and Export Countries',
template='plotly_dark',
annotations=[
dict(x=0.18, y=0.5, text='Import', font_size=20, showarrow=False),
dict(x=0.82, y=0.5, text='Export', font_size=20, showarrow=False)
])
fig.show()
No of Countries from which Commodities are Imported = 241 No of Countries to which Commodities are Exported = 248