-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecision-tree.html
845 lines (762 loc) · 59.8 KB
/
decision-tree.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta content="width=device-width, initial-scale=1.0" name="viewport">
<title>Decision Tree</title>
<meta content="" name="description">
<meta content="" name="keywords">
<!-- Favicons -->
<link href="assets/img/Favicon-1.png" rel="icon">
<link href="assets/img/Favicon-1.png" rel="apple-touch-icon">
<!-- Google Fonts -->
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i|Raleway:300,300i,400,400i,500,500i,600,600i,700,700i|Poppins:300,300i,400,400i,500,500i,600,600i,700,700i" rel="stylesheet">
<!-- Vendor CSS Files -->
<link href="assets/vendor/aos/aos.css" rel="stylesheet">
<link href="assets/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="assets/vendor/bootstrap-icons/bootstrap-icons.css" rel="stylesheet">
<link href="assets/vendor/boxicons/css/boxicons.min.css" rel="stylesheet">
<link href="assets/vendor/glightbox/css/glightbox.min.css" rel="stylesheet">
<link href="assets/vendor/swiper/swiper-bundle.min.css" rel="stylesheet">
<!-- Creating a python code section-->
<link rel="stylesheet" href="assets/css/prism.css">
<script src="assets/js/prism.js"></script>
<!-- Template Main CSS File -->
<link href="assets/css/style.css" rel="stylesheet">
<!-- To set the icon, visit https://fontawesome.com/account-->
<script src="https://kit.fontawesome.com/5d25c1efd3.js" crossorigin="anonymous"></script>
<!-- end of icon-->
<script type="text/javascript" async
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
<!-- =======================================================
* Template Name: iPortfolio
* Updated: Sep 18 2023 with Bootstrap v5.3.2
* Template URL: https://bootstrapmade.com/iportfolio-bootstrap-portfolio-websites-template/
* Author: BootstrapMade.com
* License: https://bootstrapmade.com/license/
======================================================== -->
</head>
<body>
<!-- ======= Mobile nav toggle button ======= -->
<i class="bi bi-list mobile-nav-toggle d-xl-none"></i>
<!-- ======= Header ======= -->
<header id="header">
<div class="d-flex flex-column">
<div class="profile">
<img src="assets/img/myphoto.jpeg" alt="" class="img-fluid rounded-circle">
<h1 class="text-light"><a href="index.html">Arun</a></h1>
<div class="social-links mt-3 text-center">
<a href="https://www.linkedin.com/in/arunp77/" target="_blank" class="linkedin"><i class="bx bxl-linkedin"></i></a>
<a href="https://github.com/arunp77" target="_blank" class="github"><i class="bx bxl-github"></i></a>
<a href="https://twitter.com/arunp77_" target="_blank" class="twitter"><i class="bx bxl-twitter"></i></a>
<a href="https://www.instagram.com/arunp77/" target="_blank" class="instagram"><i class="bx bxl-instagram"></i></a>
<a href="https://arunp77.medium.com/" target="_blank" class="medium"><i class="bx bxl-medium"></i></a>
</div>
</div>
<nav id="navbar" class="nav-menu navbar">
<ul>
<li><a href="index.html#hero" class="nav-link scrollto active"><i class="bx bx-home"></i> <span>Home</span></a></li>
<li><a href="index.html#about" class="nav-link scrollto"><i class="bx bx-user"></i> <span>About</span></a></li>
<li><a href="index.html#resume" class="nav-link scrollto"><i class="bx bx-file-blank"></i> <span>Resume</span></a></li>
<li><a href="index.html#portfolio" class="nav-link scrollto"><i class="bx bx-book-content"></i> <span>Portfolio</span></a></li>
<li><a href="index.html#skills-and-tools" class="nav-link scrollto"><i class="bx bx-wrench"></i> <span>Skills and Tools</span></a></li>
<li><a href="index.html#language" class="nav-link scrollto"><i class="bi bi-menu-up"></i> <span>Languages</span></a></li>
<li><a href="index.html#awards" class="nav-link scrollto"><i class="bi bi-award-fill"></i> <span>Awards</span></a></li>
<li><a href="index.html#professionalcourses" class="nav-link scrollto"><i class="bx bx-book-alt"></i> <span>Professional Certification</span></a></li>
<li><a href="index.html#publications" class="nav-link scrollto"><i class="bx bx-news"></i> <span>Publications</span></a></li>
<!-- <li><a href="index.html#extra-curricular" class="nav-link scrollto"><i class="bx bx-rocket"></i> <span>Extra-Curricular Activities</span></a></li> -->
<!-- <li><a href="#contact" class="nav-link scrollto"><i class="bx bx-envelope"></i> <span>Contact</span></a></li> -->
</ul>
</nav><!-- .nav-menu -->
</div>
</header><!-- End Header -->
<main id="main">
<!-- ======= Breadcrumbs ======= -->
<section id="breadcrumbs" class="breadcrumbs">
<div class="container">
<div class="d-flex justify-content-between align-items-center">
<h2>Machine Learning</h2>
<ol>
<li><a href="machine-learning.html" class="clickable-box">Content section</a></li>
<li><a href="index.html#portfolio" class="clickable-box">Portfolio section</a></li>
</ol>
</div>
</div>
</section><!-- End Breadcrumbs -->
<!------ right dropdown menue ------->
<div class="right-side-list">
<div class="dropdown">
<button class="dropbtn"><strong>Shortcuts:</strong></button>
<div class="dropdown-content">
<ul>
<li><a href="cloud-compute.html"><i class="fas fa-cloud"></i> Cloud</a></li>
<li><a href="AWS-GCP.html"><i class="fas fa-cloud"></i> AWS-GCP</a></li>
<li><a href="amazon-s3.html"><i class="fas fa-cloud"></i> AWS S3</a></li>
<li><a href="ec2-confi.html"><i class="fas fa-server"></i> EC2</a></li>
<li><a href="Docker-Container.html"><i class="fab fa-docker" style="color: rgb(29, 27, 27);"></i> Docker</a></li>
<li><a href="Jupyter-nifi.html"><i class="fab fa-python" style="color: rgb(34, 32, 32);"></i> Jupyter-nifi</a></li>
<li><a href="snowflake-task-stream.html"><i class="fas fa-snowflake"></i> Snowflake</a></li>
<li><a href="data-model.html"><i class="fas fa-database"></i> Data modeling</a></li>
<li><a href="sql-basics.html"><i class="fas fa-table"></i> QL</a></li>
<li><a href="sql-basic-details.html"><i class="fas fa-database"></i> SQL</a></li>
<li><a href="Bigquerry-sql.html"><i class="fas fa-database"></i> Bigquerry</a></li>
<li><a href="scd.html"><i class="fas fa-archive"></i> SCD</a></li>
<li><a href="sql-project.html"><i class="fas fa-database"></i> SQL project</a></li>
<!-- Add more subsections as needed -->
</ul>
</div>
</div>
</div>
<!-- ======= Portfolio Details Section ======= -->
<section id="portfolio-details" class="portfolio-details">
<div class="container">
<div class="row gy-4">
<h1>Decision Tree: Classification methods</h1>
<div class="col-lg-8">
<div class="portfolio-details-slider swiper">
<div class="swiper-wrapper align-items-center">
<figure>
<img src="assets/img/machine-ln/classification-decision-diagram.png" alt="" style="max-width: 90%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure>
</div>
</div>
</div>
<div class="col-lg-4 grey-box">
<div class="section-title">
<h3>Content</h3>
<ol>
<li><a href="#introduction">Introduction</a></li>
<ul>
<li><a href="#example-1">Example</a></li>
</ul>
<li><a href="#Principle">Working Principle</a></li>
<ul>
<li><a href="#splitting-criteria">Splitting criteria in Decision trees: Attribute Selection Measure (ASM)</a></li>
<li><a href="#asm">What is Attribute Selection Measure (ASM)?</a></li>
<li><a href="#why-asm">Why is ASM important?</a></li>
<li><a href="#common-asm">Common ASM Methods</a></li>
</ul>
<li><a href="#example-2">Example-1: User dataset</a></li>
<li><a href="#example-decision-tree-2">Example-2: Titanic dataset</a></li>
<li><a href="#reference">Reference</a></li>
</ol>
</div>
</div>
</div>
<section>
<!-------------------- Introduction ---------------------->
<h2 id="introdction">Introduction</h2>
<p>Decision Tree is a Supervised learning technique that can be used for both classification and Regression problems, but mostly it is preferred for solving Classification problems. It is a tree-structured classifier consisting of a root node, branches, internal nodes, and leaf nodes, where internal nodes represent the features of a dataset, branches represent the decision rules and each leaf node represents the outcome.</p>
<P>The decision rules are generally in form of if-then-else statements. The deeper the tree, the more complex the rules and fitter the model.</P>
<div class="grey-box">
<ul>
<li><strong>Root Node:</strong> This is the starting point of the tree. It represents the entire dataset and is where the first decision is made. The root node corresponds to the feature that best splits the data into distinct groups, maximizing the homogeneity of the target variable within each group.</li>
<li><strong>Internal Nodes/Decision Nodes:</strong> These nodes represent features or attributes that are used to partition the data further. Each internal node corresponds to a decision based on a specific feature, leading to one or more branches.</li>
<li><strong>Branches:</strong> Branches emanate from internal nodes and represent the possible outcomes of the decision based on the feature at that node. Each branch corresponds to a specific value or range of values of the feature being evaluated.</li>
<li><strong>Leaf Nodes:</strong> These are the terminal nodes of the tree, where no further splitting occurs. Each leaf node represents a class label (in classification) or a predicted value (in regression). The decision tree algorithm aims to make the predictions at the leaf nodes as accurate as possible based on the features and their values.</li>
</ul>
</div>
<!--------------------------------->
<!---------------Example----------->
<!--------------------------------->
<h4 id="example-1">Example-1</h4>
<p>Suppose there are different animals, and you want to identify each animal and classify them based on their features. We can easily accomplish this by using a decision tree.</p>
<figure>
<img src="assets/img/machine-ln/classification-example-1.png" alt="" style="max-width: 90%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure><br>
<p>The following is a cluttered sample data set with high entropy:</p>
<table>
<tr>
<th>Color</th>
<th>Height</th>
<th>Label</th>
</tr>
<tr>
<td>Grey</td>
<td>10</td>
<td>Elephant</td>
</tr>
<tr>
<td>Yellow</td>
<td>10</td>
<td>Giraffe</td>
</tr>
<tr>
<td>Brown</td>
<td>3</td>
<td>Monkey</td>
</tr>
<tr>
<td>Grey</td>
<td>10</td>
<td>Elephant</td>
</tr>
<tr>
<td>Yellow</td>
<td>4</td>
<td>Tiger</td>
</tr>
</table><br>
<p>We have to determine which features split the data so that the information gain is the highest. We can do that by splitting the data using each feature and checking the information gain that we obtain from them. The feature that returns the highest gain will be used for the first split. Let's consider following features into consideration:</p>
<table>
<tr>
<th>Condition</th>
</tr>
<tr><td>Color == Yellow</td></tr>
<tr><td>Height >= 10</td></tr>
<tr><td>Color == Brown</td></tr>
<tr><td>Color == Grey</td></tr>
<tr><td>Diameter <10</td></tr>
</table><br>
<p>We’ll use the <strong>information gain</strong> method to determine which variable yields the maximum gain, which can also be used as the root node.</p>
<ul>
<li>Suppose <code>Color == Yellow</code> results in the maximum information gain, so that is what we will use for our first split at the root node</li>
<li>The entropy after splitting should decrease considerably. However, we still need to split the child nodes at both the branches to attain an entropy value equal to zero.</li>
<li>We will split both the nodes using <code>height</code> variable and <code>height > 10</code> and <code>height < 10</code> as our conditions.</li>
<li>The decision tree below can now predict all the classes of animals present in the data set</li>
</ul>
<figure>
<img src="assets/img/machine-ln/classification-example-2.png" alt="" style="max-width: 70%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure><br>
<!--------------------------------->
<!-- Principle -->
<!--------------------------------->
<h2 id="Principle">Working Principle</h2>
<p>In a decision tree, for predicting the class of the given dataset, the algorithm starts from the root node of the tree. This algorithm compares the values of root attribute with the record (real dataset) attribute and, based on the comparison, follows the branch and jumps to the next node. For the next node, the algorithm again compares the attribute value with the other sub-nodes and move further. It continues the process until it reaches the leaf node of the tree.</p>
<p>The algorithm works by recursively partitioning the data into subsets based on the most significant attribute at each node using the <strong>Attribute Selection Measure (ASM)</strong>. This process continues until the subsets at a node have the same target variable or reach a specified maximum depth.</p>
<p>The working principle of a decision tree involves the following steps:</p>
<ul>
<li><strong>Feature Selection:</strong> The algorithm evaluates different features in the dataset to determine the best feature that splits the data into distinct groups. It selects the feature that maximizes the homogeneity (or purity) of the target variable within each group.</li>
<li><strong>Splitting:</strong> After selecting the best feature, the algorithm splits the dataset into subsets based on the values of that feature. Each subset corresponds to a different branch of the tree.</li>
<li><strong>Recursive Partitioning:</strong> This process of feature selection and splitting continues recursively for each subset until a stopping criterion is met. Common stopping criteria include reaching a maximum tree depth, having a minimum number of samples in a node, or no further improvement in purity.</li>
<li><strong>Leaf Node Assignment:</strong> Once the recursive partitioning process reaches a stopping point, the algorithm assigns a class label (in classification) or a predicted value (in regression) to each leaf node based on the majority class or average target variable value of the samples in that node.</li>
<li><strong>Prediction:</strong> To make predictions for new data points, the algorithm traverses the decision tree from the root node down to a leaf node, following the decision rules at each internal node based on the features of the data point. The prediction at the leaf node reached by the traversal is then assigned to the data point.</li>
</ul>
<!--------------------------------->
<!-- splitting area -->
<!--------------------------------->
<h4 id="splitting-criteria">Splitting criteria in Decision trees: Attribute Selection Measure (ASM)</h4>
<p>The splitting criteria in decision trees are used to determine how the data should be partitioned at each node of the tree. Attribute Selection Measure (ASM) is a term often used in the context of decision trees, specifically regarding the selection of the best attribute to split the data at each node. ASM refers to the criterion or metric used to evaluate and rank the attributes based on their effectiveness in partitioning the data and improving the homogeneity of the resulting subsets.</p>
<h5 id="asm">What is Attribute Selection Measure (ASM)?</h5>
<p>ASM is a criterion or metric used in decision tree algorithms to assess the importance of different attributes in making splitting decisions. It helps determine which attribute should be chosen as the splitting criterion at each node of the decision tree. The attribute with the highest ASM score is typically selected for splitting, as it leads to more informative and discriminative splits.</p>
<h5 id="why-asm">Why is ASM important?</h5>
<p>ASM plays a crucial role in the construction of decision trees by guiding the algorithm in selecting the most relevant attributes for partitioning the data. By choosing attributes with high ASM scores, decision trees can effectively divide the dataset into subsets that are more homogeneous with respect to the target variable. This, in turn, leads to the creation of accurate and interpretable decision tree models.</p>
<h5 id="common-asm">Common ASM Methods:</h5>
<p>Several methods exist for calculating ASM, each with its own strengths and considerations. Some common ASM methods include:</p>
<ol>
<li><strong>Information Gain (Entropy):</strong> Information gain measures the reduction in entropy (or uncertainty) achieved by splitting the data based on a particular feature. The goal is to select the feature that maximizes information gain, thereby improving the purity of the resulting subsets. Higher information gain implies better separation of classes or reduced randomness in the subsets.
<p><strong>Working principle:</strong>The decision tree algorithm evaluates each feature and calculates the entropy of the dataset before and after splitting based on that feature. Information gain is then computed as the difference between the initial entropy and the weighted sum of entropies of the resulting subsets.</p>
<p><strong>Formulation:</strong>Entropy is a measure of randomness or uncertainty in a dataset. Mathematically, it is defined as:</p>
$$H(X) = - \sum_{i=1}^n p_i \log_2(p_i)$$
where:
<ul>
<li>\(H(X)\) is the entropy of the dataset \(X\).</li>
<li>\(p_i\) is the probability of class \(i\) in the dataset.</li>
<li>\(n\) is the total number of classes.</li>
</ul>
<p>Information gain is used to select the best feature for splitting the data in decision trees. It measures the reduction in entropy (or increase in purity) achieved by splitting the data based on a particular feature. Mathematically, information gain is calculated as:</p>
$$\text{Information Gain} = H(X) - \sum_{j=1}^m \frac{N_j}{N} H(X_j).$$
<p>which can also be written as: <strong>Information Gain= Entropy(S)- [(Weighted Avg) \(\times\) Entropy(each feature)]</strong></p>
where:
<ul>
<li>\(H(X)\) is the entropy of the original ataset \(X\).</li>
<li>\(m\) is the number of subsets after splitting based on the feature.</li>
<li>\(N_j\) is the number of samples in subset \(i\).</li>
<li>\(N\) is the total number of samples in the original dataset.</li>
<li>\(H(X_j)\) is the entropy of subset \(j\).</li>
</ul>
<p><strong>Interpreation:</strong>Higher information gain indicates that splitting based on a certain feature leads to more homogeneous subsets with respect to the target variable. In other words, it signifies that the feature provides more discriminatory power for classification.</p>
<p><strong>Application:</strong> Information gain is commonly used in decision tree algorithms such as ID3 (Iterative Dichotomiser 3) and C4.5 (successor to ID3) for building classification trees.</p>
<!------------------------------------>
<h2 id="example-2"><strong>Example</strong></h2>
<p>Let’s understand decision trees with the help of an dataset of last 10 days:</p>
<table>
<tr>
<th>Day</th>
<th>Weather</th>
<th>Teperature</th>
<th>Humidity</th>
<th>Wind</th>
<th>Play?</th>
</tr>
<tr>
<td>1</td>
<td>Sunny</td>
<td>Hot</td>
<td>High</td>
<td>Weak</td>
<td>No</td>
</tr>
<tr>
<td>2</td>
<td>Cloudy</td>
<td>Hot</td>
<td>High</td>
<td>Weak</td>
<td>Yes</td>
</tr>
<tr>
<td>3</td>
<td>Sunny</td>
<td>Mild</td>
<td>Normal</td>
<td>Strong</td>
<td>Yes</td>
</tr>
<tr>
<td>4</td>
<td>Cloudy</td>
<td>Mild</td>
<td>High</td>
<td>Strong</td>
<td>Yes</td>
</tr>
<tr>
<td>5</td>
<td>Rainy</td>
<td>Mild</td>
<td>High</td>
<td>Strong</td>
<td>No</td>
</tr>
<tr>
<td>6</td>
<td>Rainy</td>
<td>Cool</td>
<td>Normal</td>
<td>Strong</td>
<td>No</td>
</tr>
<tr>
<td>7</td>
<td>Rainy</td>
<td>Mild</td>
<td>High</td>
<td>Weak</td>
<td>Yes</td>
</tr>
<tr>
<td>8</td>
<td>Sunny</td>
<td>Hot</td>
<td>High</td>
<td>Strong</td>
<td>No</td>
</tr>
<tr>
<td>9</td>
<td>Cloudy</td>
<td>Hot</td>
<td>Normal</td>
<td>Weak</td>
<td>Yes</td>
</tr>
<tr>
<td>10</td>
<td>Rainy</td>
<td>Mild</td>
<td>High</td>
<td>Strong</td>
<td>No</td>
</tr>
</table>
<p>Decision trees are nothing but a bunch of if-else statements in layman terms. It checks if the condition is true and if it is then it goes to the next node attached to that decision.</p>
<p>In the diagram below, the tree will first ask what is the weather? Is it sunny, cloudy, or rainy? If yes then it will go to the next feature which is humidity and wind. It will again check if there is a strong wind or weak, if it’s a weak wind and it’s rainy then the person may go and play.</p>
<p>Suppose we have a dataset with the following target variable (class labels) and one attribute:</p>
<table>
<tr>
<th>Day</th>
<th>Attribute (feature)</th>
<th>Play?</th>
</tr>
<tr>
<td>1</td>
<td>Sunny</td>
<td>No</td>
</tr>
<tr>
<td>2</td>
<td>Cloudy</td>
<td>Yes</td>
</tr>
<tr>
<td>3</td>
<td>Sunny</td>
<td>Yes</td>
</tr>
<tr>
<td>4</td>
<td>Cloudy</td>
<td>Yes</td>
</tr>
<tr>
<td>5</td>
<td>Rainy</td>
<td>No</td>
</tr>
<tr>
<td>6</td>
<td>Rainy</td>
<td>No</td>
</tr>
<tr>
<td>7</td>
<td>Rainy</td>
<td>Yes</td>
</tr>
<tr>
<td>8</td>
<td>Sunny</td>
<td>No</td>
</tr>
<tr>
<td>9</td>
<td>Cloudy</td>
<td>Yes</td>
</tr>
<tr>
<td>10</td>
<td>Rainy</td>=
<td>No</td>
</tr>
</table>
<p>We can cluttered the dataset as follows:</p>
<table>
<tr>
<th><Attribute/th>
<th>Counts(Yes)</th>
<th>Counts(No)</th>
</tr>
<tr>
<td>Sunny</td>
<td>1</td>
<td>2</td>
</tr>
<tr>
<td>Cloudy</td>
<td>3</td>
<td>0</td>
</tr>
<tr>
<td>Rainy</td>
<td>1</td>
<td>3</td>
</tr>
</table>
<p>Similalry we can create a table for the Temperature, Humidity, and Wind,features.</p>
<p>Now, let's calculate the entropy of the target variable "Play?" and then compute the Information Gain for each attribute (feature).</p>
<ul>
<li><strong>Step-1: Calculate the entropy of the target variable "Play?":</strong>
<p>Total instances (sample): \(n =10\)</p>
<p>Number of "Yes" instances (positive): \(p_{\text{Yes}} = 5\)</p>
<p>Number of "No" instances (negative): \(p_{\text{No}} = 5\)</p>
<p><strong>Entropy (H(play?)):</strong></p>
$$H(Play?) = - \left(\frac{5}{10} \log_2\frac{5}{10}+\frac{5}{10} \log_2\frac{5}{10}\right) = -(-0.5 -0.5) =1$$
</li>
<li><strong>Step 2: Calculate Information Gain for each feature: </strong>
<p><strong>Sunny:</strong></p>
<ul>
<li>Total instances with "Cloudy": \(n =3\)</li>
<li>Number of "Yes" instances (positive) with "Sunny": \(p_{\text{Yes,Sunny}}=1\)</li>
<li>Number of "No" instances (negative) with "Sunny": \(p_{\text{No,SUnny}}=2\)</li>
<li>Entropy for "Sunny" i.e. \(H(Sunny) = -\left(\frac{1}{3} \log_2\frac{1}{3}+\frac{2}{3} \log_2\frac{2}{3}\right) =0.918\)</li>
<li>Information Gain i.e. \(IG(\text{Play?, Sunny}) = 1-\frac{3}{10}\times 0.918 =0.7246\).</li>
Similalry, we can calculate entropy and information Gain for "Cloudy" and "Rainy".
</ul>
<p><strong>Cloudy:</strong></p>
<ul>
<li>Total instances with "Cloudy": \(n =3\)</li>
<li>Number of "Yes" instances (positive) with "Cloudy": \(p_{\text{Yes,Cloudy}}=3\)</li>
<li>Number of "No" instances (negative) with "Cloudy": \(p_{\text{No,Cloudy}}=0\)</li>
<li>Entropy for "Cloudy" i.e. \(H(Cloudy) = -\left(\frac{3}{3} \log_2\frac{3}{3}+\frac{0}{3} \log_2\frac{0}{3}\right) =0\)</li>
<li>Information Gain i.e. \(IG(\text{Play?, Cloudy}) = 1-\frac{3}{10}\times 0 =1\).</li>
</ul>
<p><strong>Rainy:</strong></p>
<ul>
<li>Total instances with "Rainy": \(n =4\)</li>
<li>Number of "Yes" instances (positive) with "Rainy": \(p_{\text{Yes,Rainy}}=1\)</li>
<li>Number of "No" instances (negative) with "Rainy": \(p_{\text{No,Rainy}}=3\)</li>
<li>Entropy for "Rainy" i.e. \(H(Rainy) = -\left(\frac{1}{4} \log_2\frac{1}{4}+\frac{3}{4} \log_2\frac{3}{4}\right) =0.8113\)</li>
<li>Information Gain i.e. \(IG(\text{Play?, Rainy}) = 1-\frac{4}{10}\times 0.8113 =0.6755\).</li>
</ul>
<p>Now we can calculate the weighted entropy for weather feature:</p>
$$H_{\text{Weather}} = \frac{3}{10} \times H_{\text{Sunny}} + \frac{3}{10} \times H_{\text{Cloudy}} + \frac{4}{10} \times H_{\text{Rainy}} = \frac{3}{10} \times 0.918 + \frac{3}{10} \times 0 + \frac{4}{10} \times 0.8113 =0.5999$$
<p>Similalry, Information gain:</p>
$$IG_{\text{Weather}} = H- H_{\text{Weather}} = 1-0.5999=0.4001$$
</li>
</ul>
<p>Similarly, calculate information gain for other features: Temperature, Humidity, Wind. This process will help us determine which feature provides the most Information Gain, indicating its importance for splitting the dataset in the decision tree.</p>
</li>
<figure>
<img src="assets/img/machine-ln/classfication-decision-tree-example.png" alt="" style="max-width: 90%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure>
<!----------- end of IG and entropy -------------->
<li><strong>Gini Index (Impurity):</strong> Gini impurity measures the probability of misclassifying a randomly chosen sample if it were labeled according to the class distribution in the dataset. The goal is to select the feature that minimizes Gini impurity, thereby improving the homogeneity of the resulting subsets. Lower Gini impurity implies purer subsets with fewer mixed-class samples.
<p><strong>Working Principle:</strong> Similar to information gain, the decision tree algorithm evaluates each feature and calculates the Gini impurity of the dataset before and after splitting based on that feature. The feature with the lowest Gini impurity (or highest purity) is selected for splitting. Mathematically, Gini impurity is calculated as:</p>
$$G(X) = 1- \sum_{i=1}^n p_i^2$$
where:
<ul>
<li>\(G(X)\) is the Gini impurity of the dataset \(X\).</li>
<li>\(p_i\) is the probability of class \(i\) in the dataset.</li>
<li>\(n\) is the total number of classes.</li>
</ul>
<p><strong>Interpretation:</strong>Lower Gini impurity indicates that splitting based on a certain feature leads to more homogeneous subsets with respect to the target variable. It signifies that the feature effectively separates the classes in the dataset.</p>
<p><strong>Application:</strong>Gini impurity is commonly used in decision tree algorithms such as CART (Classification and Regression Trees) for building both classification and regression trees.</p>
</li>
<li><strong>Gain Ratio:</strong> Gain Ratio is a modification of Information Gain that aims to overcome its bias towards attributes with a large number of distinct values. It penalizes attributes with many distinct values, thereby helping to prevent overfitting.
<p><strong>Formula:</strong></p>
$$\text{Gain Ratio} = \frac{\text{Information Gain}}{\text{Split Information}}$$
<p><strong>Explanation:</strong>Gain Ratio adjusts Information Gain by considering the intrinsic information of each attribute. It divides the Information Gain by the Split Information to normalize the gain by the attribute's intrinsic information. This normalization helps in avoiding the bias towards attributes with many distinct values.</p>
</li>
<li><strong>Chi-Square Test:</strong>Chi-Square Test evaluates the independence between attributes and the target variable by comparing the observed distribution of class labels in each subset to the expected distribution. It helps determine whether the splits based on a particular attribute are statistically significant.
<p><strong>Formula:</strong>The Chi-Square Test statistic is calculated as follows:</p>
$$\chi^2 = \sum_{i=1}^k \frac{(O_i - E_i)^2}{E_i}$$
where:
<ul>
<li>\(\chi^2\) is the Chi-Square test statistic.</li>
<li>\(O_i\) is the observed frequency of class \(i\) in the subset.</li>
<li>\(E_i\) is the expected frequency of class \(i\) based on the overall distribution.</li>
</ul>
<p><strong>Explanation:</strong> Chi-Square Test compares the observed frequencies of class labels in each subset to the expected frequencies based on the overall distribution. A higher Chi-Square Test statistic indicates a greater difference between the observed and expected frequencies, suggesting that the splits based on the attribute are more informative and significant.</p>
</li>
</ol>
<p>Each of these methods evaluates attributes based on different criteria, such as the reduction in entropy, impurity, or independence between attributes and the target variable. The choice of ASM method depends on the specific characteristics of the dataset and the problem being addressed.</p>
<h5 id="application-asm">Application of ASM</h5>
<p>ASM is widely used in decision tree algorithms such as ID3, C4.5, and CART for attribute selection. These algorithms leverage ASM to determine the optimal splitting criteria at each node, leading to the creation of decision trees that effectively capture the underlying patterns and relationships in the data.</p>
<p>In conclusion, Attribute Selection Measure (ASM) is a fundamental concept in decision tree algorithms, guiding the selection of attributes for splitting the data at each node. By evaluating and ranking attributes based on their effectiveness in partitioning the data, ASM helps construct decision trees that are accurate, interpretable, and well-suited for a variety of machine learning tasks.</p>
<!---------------- Example Probelm --------------------------->
<h3 id="example-decision-tree">Example-1: User dataset</h3>
<p>In this example, we again consider the user dataset similar to what we used in the case of <a href="naive-byes.html">Navive-Bayes</a> and the <a href="knn.html">KNN-algorithm</a>. We first use the feature and target, then use the <code>train_test_split</code> to create the train and test datasets. In the final step, we instantiate the classifer using <code>ecisionTreeClassifier</code> and build our model using the train datset. In the end, we visualize how our model performs on the train and test datsets. The detailed notebook is available at my <a href="https://github.com/arunp77/Machine-Learning/tree/main/Supervised-learning" target="_blank">Github repo</a> and the code is available in the <a href="https://github.com/arunp77/Machine-Learning/blob/main/Supervised-learning/Project-2.4-Decision-tree.ipynb" target="_blank">Jupyter notebook at my repo</a>.</p>
<pre><code class="language-python">
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df_user = pd.read_csv('User_Data.csv')
# Importing the dataset
X = df_user.iloc[:, [2, 3]].values
y = df_user.iloc[:, 4].values
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#Fitting Decision Tree classifier to the training set
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)
# Importing libraries
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Set up the figure with two subplots in one row and two columns
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
# Visulaizing the training set result
x_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),
np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))
axes[0].contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha=0.75, cmap=ListedColormap(['#87CEEB', '#90EE90']))
axes[0].set_xlim(X1.min(), X1.max())
axes[0].set_ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
axes[0].scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c=ListedColormap(['#0000FF', '#2ca02c'])(i), label=j)
axes[0].set_title('K-NN Algorithm (Training set)')
axes[0].set_xlabel('Age')
axes[0].set_ylabel('Estimated Salary')
axes[0].legend()
# Visulaizing the test set result
x_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),
np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))
axes[1].contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha=0.75, cmap=ListedColormap(['#87CEEB', '#90EE90']))
axes[1].set_xlim(X1.min(), X1.max())
axes[1].set_ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
axes[1].scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c=ListedColormap(['#0000FF', '#2ca02c'])(i), label=j)
axes[1].set_title('K-NN Algorithm (Test set)')
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Estimated Salary')
axes[1].legend()
plt.tight_layout()
plt.show()
</code></pre>
<figure>
<img src="assets/img/machine-ln/classification-decision-tree-example.png" alt="" style="max-width: 90%; max-height: auto;">
<figcaption style="text-align: center;">As we can see in the above image that there are some green data points within the purple region and vice versa. So, these are the incorrect predictions which we can see through the confusion matrix.</figcaption>
</figure>
<!----------------------------------->
<h3 id="example-decision-tree-2">Example-2: Titanic dataset</h3>
<p>In this example, we will use Titanic dataset. This dataset is available at <a href="https://www.kaggle.com/c/titanic/data" tagret>Kaggle</a>. Lets do this example step by step:</p>
<ul>
<li><strong>Step-1: Load the dataset and selecting the feature and taget vairable:</strong>
<pre><code class="language-python">
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('titanic.csv')
</code></pre>
Since Column 'Sex' contains either 'Male' or 'Female', we are going to change them to numerical values. Similary droping the not needed columns in our analysis and ignorning the NaN values:
</li>
<li><strong></strong>
<pre><code class="language-python">
# checnging the categorical values for Sex to 0 or 1.
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
# we dont need the Cabin column in our case
df.drop(['Cabin', 'Name', 'Ticket'], axis=1, inplace=True)
# droping the NaN values
df.dropna(inplace=True)
</code></pre>
In the next step, we need to create the dummy columns for our target variable. The dummy/indicator variables are used for converting the categoriecal variables. For this prupose, we use <code>get_dummies</code> class, which creates new columns for each unique value in the categorical column, with binary values indicating the presence or absence of each value in the original column.
<pre><code class="language-python">df = pd.get_dummies(df, columns=['Embarked'])</code></pre>
Now the X and y variables can be found as:
<pre><code class="language-python">
X = df.drop('Survived', axis=1)
y = df['Survived']
</code></pre>
</li>
<li><strong>Train-test and model building</strong>
<pre><code class="language-python">
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
</code></pre>
Again us instantiate our classifier <code>DecisionCLassifier</code> from the sub-module <code>sklearn.tree</code>. Here we choose the depth of the tree.
<pre><code class="language-python">
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth = 3, random_state=42)
model.fit(X_train, y_train)
</code></pre>
The socre from the train and test datasets can be found by <code>model.score(X_train, y_train)</code> and <code>model.score(X_test, y_test)</code> which gives the socre for the two datsets 0.8370786516853933 and 0.7528089887640449. The analysis of the score on the training set and on the test set allows to identify the overfitting. Indeed, the score on the training test is higher than the test score. To overcome this problem we will add another parameter: <code>min_samples_leaf</code>. This parameter indicates the minimum number of samples required for a node separation. So, if there are not enough samples, the node will be a leaf (a final node). The parameters <code>max_depth</code> and <code>min_samples_leaf</code> are two essential parameters to avoid overfitting.
<pre><code class="language-python">
from sklearn.tree import DecisionTreeClassifier
model_min_samples = DecisionTreeClassifier(max_depth = 3, min_samples_leaf = 25, random_state=42)
model_min_samples.fit(X_train, y_train)
</code></pre>
<p>Now in this case, we found that accuracies for the train and test datsets are reduced and are <code>0.8258426966292135</code> and <code>0.7471910112359551</code> respectively.</p>
<p>Although the score on the training set has slightly decreased, we notice that the gap between the score on the training set and the score on the test set is reduced. We therefore decide to continue with this model which generalizes better on the data it does not know.</p>
<p></p>
</li>
<li><strong>The interpretation of the model:</strong>
<p>One of the strong points of decision trees is the simplicity of interpreting how they work. In the same way that we analyzed our regression tree, we can analyze our classification tree. With this study, we want to know which variables are the most important for the model and which thresholds it uses to make its predictions.</p>
<pre><code class="language-python">
from sklearn.tree import plot_tree
fig, ax = plt.subplots(figsize=(40, 20))
plot_tree(model_min_samples,
feature_names=[
'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_C',
'Embarked_Q', 'Embarked_S'
],
class_names=['Died', 'Survived'],
filled=True,
rounded=True)
plt.show()
</code></pre>
<figure>
<img src="assets/img/machine-ln/classfication-decision-tree-titanic.png" alt="" style="max-width: 90%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure>
<ul>
<li>The root node, the node at the top of the tree, contains all the samples of the training set, there are 534 samples: samples=534.
<p>The first test that will be performed corresponds to Sex < = 0.5. As a reminder, Sex is a binary variable, it is equal to 0 when the individual is a man, 1 when it is a woman. We then look at the value for each sample. All the samples for which the variable Sex is 0, go in the left child node, if not in the right child node. More simply, all men go in the left child node, all women in the right child node.</p>
<p>The parameter value indicates the distribution of classes in y_train before any separation. We have initially 325 individuals who are dead and 209 who are alive. The class parameter indicates which is the majority class. Here it is the class 0 "Died": class = Died.</p>
<p>Finally, for a classification problem, the error is calculated using the Gini criterion. This criterion is between 0 and 0.5. The higher this criterion is, the more heterogeneous the node is and the lower the criterion is, the more homogeneous it is. A node is homogeneous when it contains only samples of one class, the Gini criterion is equal in this case to 0.</p>
<p>The objective of our model is to separate our classes as well as possible, so we want to end up with nodes as homogeneous as possible. Finally, the more homogeneous a leaf will be, with a low Gini criterion, the better the model will perform.</p>
</li>
<li>We remind that the child nodes are the intermediate nodes of the tree. In the first left child node, we retrieve all the male individuals. There are 343 males in the training set (samples=343). The application of the condition described in the root node, allows us to reduce the Gini score by almost 0.169, we go from 0.476 to 0.307. The application of this condition has increased the homogeneity of the node. In this child node, there are more people who died than survived (278 > 65). The majority class is Died.</li>
<li>The separations follow until the leaves, the terminal nodes, are obtained. The predicted class for the samples belonging to the leaf is the majority class. The color code is as follows: Orange when the majority class is class 0, Blue when it is class 1. The intensity of the color depends on the value of the Gini criterion, the lower it is the more intense the colors are. For the leftmost leaf, we have a Gini criterion equal to 0.5. There are 50 samples and the distribution is 25 samples belonging to class 0 and 25 samples to class 1. The node is therefore perfectly heterogeneous. This means that the model is wrong for 25 individuals, i.e. 50% of the samples of this sheet.</li>
</ul>
</li>
</ul>
<p>Finally, the variables that enabled the classification of the samples, ranked in order, are "Sex", "Pclass", "Age" and "Fare". The higher a variable is placed in the tree, the more important it played a role in the classification.</p>
<p>But how to quantify the role of each variable?</p>
<p>Instead of displaying the tree, or in addition to this analysis, it is possible to study the "feature importances" of the model. This attribute of the model class, called <code>feature_importances_</code>, allows to classify the variables according to the role they played in the prediction choices of the model. The sum of these values is 1.</p>
<p>The importance of a variable is calculated from the decrease in node impurity weighted by the probability of reaching the node. And the probability of reaching the node is obtained by dividing the number of samples reaching the node by the total number of samples.</p>
<p>Finally, the higher the importance value, the more important the variable.</p>
<pre><code class="language-python">
X_importances = pd.DataFrame({
"Variables": X.columns,
"Importance": model_min_samples.feature_importances_
}).sort_values(by='Importance', ascending=False)
X_importances.nlargest(4, "Importance").plot.bar(x="Variables",
y="Importance",
figsize=(15, 5),
color="#4529de");
</code></pre>
<figure>
<img src="assets/img/machine-ln/classification-decision-tree-example-2-importance.png" alt="" style="max-width: 90%; max-height: auto;">
<figcaption style="text-align: center;"></figcaption>
</figure>
<p>From these elements, we are able to decide on the accuracy of the expression "women and children first" (within the context of the data we have at our disposal). The women are indeed among the people who survived most after the disaster. The Sex variable is the most important one in the model. Nevertheless, even if age is among the three most important variables in the tree, a more precise analysis of the tree does not allow us to validate this assertion.</p>
</section>
<!----------- Reference ----------->
<section id="reference">
<h2>References</h2>
<ul>
<li><a href="https://github.com/arunp77/Machine-Learning/tree/main/Supervised-learning" target="_blank">Github repo with codes.</a></li>
<li><a href="https://arunp77.github.io/logistic-regression.html#con-mat" target="_blank">Confusion matrix details</a>.</li>
<li>My github Repositories on Remote sensing <a href="https://github.com/arunp77/Machine-Learning/" target="_blank">Machine learning</a></li>
<li><a href="https://mlu-explain.github.io/linear-regression/" target="_blank">A Visual Introduction To Linear regression</a> (Best reference for theory and visualization).</li>
<li>Book on Regression model: <a href="https://avehtari.github.io/ROS-Examples/" target="_blank">Regression and Other Stories</a></li>
<li>Book on Statistics: <a href="https://hastie.su.domains/Papers/ESLII.pdf" target="_blank">The Elements of Statistical Learning</a></li>
<li><a href="https://www.javatpoint.com/machine-learning-naive-bayes-classifier" target="_blank">Naïve Bayes Classifier Algorithm, JAVAPoint.com</a></li>
<li><a href="https://www.colorado.edu/amath/sites/default/files/attached-files/ch12_0.pdf">https://www.colorado.edu/amath/sites/default/files/attached-files/ch12_0.pdf</a></li>
<li><a href="https://datahacker.rs/002-machine-learning-linear-regression-model/" target="_blank">One of the best description on Linear regression</a>.</li>
</ul>
</section>
<hr>
<div style="background-color: #f0f0f0; padding: 15px; border-radius: 5px;">
<h3>Some other interesting things to know:</h3>
<ul style="list-style-type: disc; margin-left: 30px;">
<li>Visit my website on <a href="sql-project.html">For Data, Big Data, Data-modeling, Datawarehouse, SQL, cloud-compute.</a></li>
<li>Visit my website on <a href="Data-engineering.html">Data engineering</a></li>
</ul>
</div>
<p></p>
<div class="navigation">
<a href="index.html#portfolio" class="clickable-box">
<span class="arrow-left">Portfolio section</span>
</a>
<a href="machine-learning.html" class="clickable-box">
<span class="arrow-right">Content</span>
</a>
</div>
</div>
</section><!-- End Portfolio Details Section -->
</main><!-- End #main --
<!-- ======= Footer ======= -->
<footer id="footer">
<div class="container">
<div class="copyright">
© Copyright <strong><span>Arun</span></strong>
</div>
</div>
</footer><!-- End Footer -->
<a href="#" class="back-to-top d-flex align-items-center justify-content-center"><i class="bi bi-arrow-up-short"></i></a>
<!-- Vendor JS Files -->
<script src="assets/vendor/purecounter/purecounter_vanilla.js"></script>
<script src="assets/vendor/aos/aos.js"></script>
<script src="assets/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
<script src="assets/vendor/glightbox/js/glightbox.min.js"></script>
<script src="assets/vendor/isotope-layout/isotope.pkgd.min.js"></script>
<script src="assets/vendor/swiper/swiper-bundle.min.js"></script>
<script src="assets/vendor/typed.js/typed.umd.js"></script>
<script src="assets/vendor/waypoints/noframework.waypoints.js"></script>
<script src="assets/vendor/php-email-form/validate.js"></script>
<!-- Template Main JS File -->
<script src="assets/js/main.js"></script>
<script>
document.addEventListener("DOMContentLoaded", function () {
hljs.initHighlightingOnLoad();
});
</script>
</body>
</html>