import seaborn as sns

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore")


df = pd.read_csv('C:/Users/Sanchit/Downloads/crypto prices.csv')

df


df.head()


# Cheack how many rows and columns are there
df.shape

(1730, 10)


# Use Info method to look for the information in the data

df.info

<bound method DataFrame.info of       Unnamed: 0        Date  Adj Close (BNB)  Volume (BNB)  Adj Close (BTC)  \
0              0   11/9/2017         1.990770      19192200      7143.580078   
1              1  11/10/2017         1.796840      11155000      6618.140137   
2              2  11/11/2017         1.670470       8178150      6357.600098   
3              3  11/12/2017         1.519690      15298700      5950.069824   
4              4  11/13/2017         1.686620      12238800      6559.490234   
...          ...         ...              ...           ...              ...   
1725        1725   7/31/2022       283.579468    1313531523     23336.896484   
1726        1726    8/1/2022       283.539490    1314157614     23314.199219   
1727        1727    8/2/2022       283.820984    1768344106     22978.117188   
1728        1728    8/3/2022       298.356781    2133584480     22846.507813   
1729        1729    8/4/2022       310.706055    1926587001     22858.423828   

      Volume (BTC)  Adj Close (USDT)  Volume (USDT)  Adj Close (ETH)  \
0       3226249984          1.008180      358188000       320.884003   
1       5208249856          1.006010      756446016       299.252991   
2       4908680192          1.008990      746227968       314.681000   
3       8957349888          1.012470     1466060032       307.907990   
4       6263249920          1.009350      767884032       316.716003   
...            ...               ...            ...              ...   
1725   23553591896          1.000328    52267348020      1681.517334   
1726   25849159141          1.000204    50882756969      1635.195801   
1727   28389250717          1.000159    54793315279      1632.945435   
1728   26288169966          1.000204    47717439471      1618.874512   
1729   24817580032          1.000133    44526180493      1608.205811   

      Volume (ETH)  
0        893249984  
1        885985984  
2        842300992  
3       1613479936  
4       1041889984  
...            ...  
1725   14200735370  
1726   16191371176  
1727   20426082309  
1728   16786218830  
1729   14467440626  

[1730 rows x 10 columns]>


# Description of Data

df.describe()


df.isna().sum()

Unnamed: 0          0
Date                0
Adj Close (BNB)     0
Volume (BNB)        0
Adj Close (BTC)     0
Volume (BTC)        0
Adj Close (USDT)    0
Volume (USDT)       0
Adj Close (ETH)     0
Volume (ETH)        0
dtype: int64


plt.figure(figsize = (25, 5))
sns.set_style('dark')
sns.lineplot(data=data)

<AxesSubplot:>


plt.figure(figsize = (20, 10))
sns.set_style('dark')
sns.lineplot(data = data['Adj Close (BNB)'], label = 'BNB')
sns.lineplot(data = data['Adj Close (ETH)'], label = 'ETH')
sns.lineplot(data = data['Adj Close (BTC)'], label = 'BTC')
sns.lineplot(data = data['Adj Close (USDT)'], label = 'USDT')
plt.title('Adjacent Close Price')

Text(0.5, 1.0, 'Adjacent Close Price')


plt.figure(figsize = (20, 10))
sns.set_style('dark')
sns.lineplot(data = data['Volume (BNB)'], label = 'BNB')
sns.lineplot(data = data['Volume (ETH)'], label = 'ETH')
sns.lineplot(data = data['Volume (BTC)'], label = 'BTC')
sns.lineplot(data = data['Volume (USDT)'], label = 'USDT')
plt.title('Volume')

Text(0.5, 1.0, 'Volume')


df.hist(figsize=(20, 8), layout=(4,8))

array([[<AxesSubplot:title={'center':'Unnamed: 0'}>,
        <AxesSubplot:title={'center':'Adj Close (BNB)'}>,
        <AxesSubplot:title={'center':'Volume (BNB)'}>,
        <AxesSubplot:title={'center':'Adj Close (BTC)'}>,
        <AxesSubplot:title={'center':'Volume (BTC)'}>,
        <AxesSubplot:title={'center':'Adj Close (USDT)'}>,
        <AxesSubplot:title={'center':'Volume (USDT)'}>,
        <AxesSubplot:title={'center':'Adj Close (ETH)'}>],
       [<AxesSubplot:title={'center':'Volume (ETH)'}>, <AxesSubplot:>,
        <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
        <AxesSubplot:>, <AxesSubplot:>],
       [<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
        <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>],
       [<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
        <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>]],
      dtype=object)


df.corr()


plt.figure(figsize=(18,16))
sns.heatmap(data.corr(), annot=True, cmap=plt.cm.CMRmap_r);


sns.pairplot(data.sample(n=100));


X = df.loc[:, ['Adj Close (BNB)', 'Adj Close (USDT)', 'Adj Close (ETH)']]
Y = df.loc[:, 'Adj Close (BTC)']


X.head()


Y.head()

0    7143.580078
1    6618.140137
2    6357.600098
3    5950.069824
4    6559.490234
Name: Adj Close (BTC), dtype: float64


X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state=2)


from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


from sklearn.neighbors import KNeighborsRegressor
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor


neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X_train, Y_train)

KNeighborsRegressor(n_neighbors=2)


rf = RandomForestRegressor()
rf.fit(X_train,Y_train)

RandomForestRegressor()


dt = tree.DecisionTreeRegressor()
dt.fit(X_train, Y_train)

DecisionTreeRegressor()


est = GradientBoostingRegressor()
est.fit(X_train, Y_train)

GradientBoostingRegressor()


Y_pred_neigh = neigh.predict(X_test)
r2neigh = metrics.r2_score(Y_test, Y_pred_neigh)

Y_pred_rf = rf.predict(X_test)
r2rf = metrics.r2_score(Y_test, Y_pred_rf)

Y_pred_dt = dt.predict(X_test)
r2dt = metrics.r2_score(Y_test, Y_pred_dt)

Y_pred_est = est.predict(X_test)
r2est = metrics.r2_score(Y_test, Y_pred_est)


print("*"*10, "Accuracy", "*"*10)

print("-"*30)
print("K nearest neighbors: ", r2neigh)
print("-"*30)


print("-"*30)
print("random forest: ", r2rf)
print("-"*30)


print("-"*30)
print("decision tree: ", r2dt)
print("-"*30)

print("-"*30)
print("gradient boosting: ", r2est)
print("-"*30)

********** Accuracy **********
------------------------------
K nearest neighbors:  0.9632261125895967
------------------------------
------------------------------
random forest:  0.9683946505295005
------------------------------
------------------------------
decision tree:  0.9399728872719846
------------------------------
------------------------------
gradient boosting:  0.9617905997973549
------------------------------


from sklearn.model_selection import RandomizedSearchCV
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 1000, num = 100)]
max_features = ['auto', 'sqrt', 'log2']
max_depth = [int(x) for x in np.linspace(10, 200, num = 20)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
print(random_grid)

{'n_estimators': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640, 650, 660, 670, 680, 690, 700, 710, 720, 730, 740, 750, 760, 770, 780, 790, 800, 810, 820, 830, 840, 850, 860, 870, 880, 890, 900, 910, 920, 930, 940, 950, 960, 970, 980, 990, 1000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False]}


rf = RandomForestRegressor(**rf_random.best_params_)
rf.fit(X_train, Y_train)

Y_pred_rf = rf.predict(X_test)
r2rf = metrics.r2_score(Y_test, Y_pred_rf)

print("-"*30)
print("Accuracy: ", r2rf)
print("-"*30)

------------------------------
Accuracy:  0.968881458729958
------------------------------

	Unnamed: 0	Adj Close (BNB)	Volume (BNB)	Adj Close (BTC)	Volume (BTC)	Adj Close (USDT)	Volume (USDT)	Adj Close (ETH)	Volume (ETH)
count	1730.0000	1730.000000	1.730000e+03	1730.000000	1.730000e+03	1730.000000	1.730000e+03	1730.000000	1.730000e+03
mean	864.5000	134.960777	9.647885e+08	20191.519348	2.570080e+10	1.001717	4.085393e+10	1105.603148	1.288480e+10
std	499.5523	185.725539	1.486405e+09	17507.045641	2.003526e+10	0.005928	3.912941e+10	1233.514214	1.104728e+10
min	0.0000	1.510360	9.284000e+03	3236.761719	2.923670e+09	0.966644	3.581880e+08	84.308296	6.217330e+08
25%	432.2500	13.670646	1.177502e+08	7457.858887	9.718123e+09	0.999968	4.705288e+09	203.758255	3.844413e+09
50%	864.5000	22.234484	3.079566e+08	10330.514649	2.313310e+10	1.000601	3.348729e+10	436.047501	1.047600e+10
75%	1296.7500	288.047844	1.467549e+09	35538.384766	3.518178e+10	1.002838	6.087353e+10	1828.478180	1.823330e+10
max	1729.0000	675.684082	1.798295e+10	67566.828125	3.509679e+11	1.077880	2.790675e+11	4812.087402	8.448291e+10

	Unnamed: 0	Adj Close (BNB)	Volume (BNB)	Adj Close (BTC)	Volume (BTC)	Adj Close (USDT)	Volume (USDT)	Adj Close (ETH)	Volume (ETH)
Unnamed: 0	1.000000	0.761237	0.571730	0.743903	0.586429	-0.181445	0.722177	0.697317	0.674547
Adj Close (BNB)	0.761237	1.000000	0.732715	0.918562	0.407226	-0.154652	0.659554	0.962406	0.569978
Volume (BNB)	0.571730	0.732715	1.000000	0.768149	0.600409	-0.117310	0.773426	0.654506	0.699198
Adj Close (BTC)	0.743903	0.918562	0.768149	1.000000	0.573059	-0.151540	0.773313	0.926972	0.672594
Volume (BTC)	0.586429	0.407226	0.600409	0.573059	1.000000	-0.095888	0.867511	0.383787	0.859965
Adj Close (USDT)	-0.181445	-0.154652	-0.117310	-0.151540	-0.095888	1.000000	-0.139902	-0.149685	-0.119487
Volume (USDT)	0.722177	0.659554	0.773426	0.773313	0.867511	-0.139902	1.000000	0.629372	0.948139
Adj Close (ETH)	0.697317	0.962406	0.654506	0.926972	0.383787	-0.149685	0.629372	1.000000	0.545473
Volume (ETH)	0.674547	0.569978	0.699198	0.672594	0.859965	-0.119487	0.948139	0.545473	1.000000

Performing Exploratory Data Analysis on the Dataset¶

Checking if is there any Null (NaN) values are there in our Dataset.¶

Visualization Analysis¶

Now let's check the correlation between the variables¶

Data Pre-Processing for Predictions¶

Model Evaluation¶

Hyperparameter Tuning¶

Model Evaluation Performance¶

	Unnamed: 0	Date	Adj Close (BNB)	Volume (BNB)	Adj Close (BTC)	Volume (BTC)	Adj Close (USDT)	Volume (USDT)	Adj Close (ETH)	Volume (ETH)
0	0	11/9/2017	1.990770	19192200	7143.580078	3226249984	1.008180	358188000	320.884003	893249984
1	1	11/10/2017	1.796840	11155000	6618.140137	5208249856	1.006010	756446016	299.252991	885985984
2	2	11/11/2017	1.670470	8178150	6357.600098	4908680192	1.008990	746227968	314.681000	842300992
3	3	11/12/2017	1.519690	15298700	5950.069824	8957349888	1.012470	1466060032	307.907990	1613479936
4	4	11/13/2017	1.686620	12238800	6559.490234	6263249920	1.009350	767884032	316.716003	1041889984
...	...	...	...	...	...	...	...	...	...	...
1725	1725	7/31/2022	283.579468	1313531523	23336.896484	23553591896	1.000328	52267348020	1681.517334	14200735370
1726	1726	8/1/2022	283.539490	1314157614	23314.199219	25849159141	1.000204	50882756969	1635.195801	16191371176
1727	1727	8/2/2022	283.820984	1768344106	22978.117188	28389250717	1.000159	54793315279	1632.945435	20426082309
1728	1728	8/3/2022	298.356781	2133584480	22846.507813	26288169966	1.000204	47717439471	1618.874512	16786218830
1729	1729	8/4/2022	310.706055	1926587001	22858.423828	24817580032	1.000133	44526180493	1608.205811	14467440626

	Unnamed: 0	Date	Adj Close (BNB)	Volume (BNB)	Adj Close (BTC)	Volume (BTC)	Adj Close (USDT)	Volume (USDT)	Adj Close (ETH)	Volume (ETH)
0	0	11/9/2017	1.99077	19192200	7143.580078	3226249984	1.00818	358188000	320.884003	893249984
1	1	11/10/2017	1.79684	11155000	6618.140137	5208249856	1.00601	756446016	299.252991	885985984
2	2	11/11/2017	1.67047	8178150	6357.600098	4908680192	1.00899	746227968	314.681000	842300992
3	3	11/12/2017	1.51969	15298700	5950.069824	8957349888	1.01247	1466060032	307.907990	1613479936
4	4	11/13/2017	1.68662	12238800	6559.490234	6263249920	1.00935	767884032	316.716003	1041889984