Skip to content

Commit de20f1d

Browse files
committed
Improve channel and EPG matching with Sørensen-Dice similarity algorithm
- Add StringComparer utility to calculate string similarity - Enhance channel and EPG matching to use fuzzy string comparison - Update channel and EPG finding methods to support more flexible matching - Modify EPG entry description to include channel ID
1 parent bb0b332 commit de20f1d

File tree

6 files changed

+211
-39
lines changed

6 files changed

+211
-39
lines changed

CMakeLists.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ set(IPTV_SOURCES src/addon.cpp
4545
src/iptvsimple/utilities/Logger.cpp
4646
src/iptvsimple/utilities/SettingsMigration.cpp
4747
src/iptvsimple/utilities/StreamUtils.cpp
48-
src/iptvsimple/utilities/WebUtils.cpp)
48+
src/iptvsimple/utilities/WebUtils.cpp
49+
src/iptvsimple/utilities/StringComparer.cpp)
4950

5051
set(IPTV_HEADERS src/addon.h
5152
src/IptvSimple.h
@@ -76,7 +77,8 @@ set(IPTV_HEADERS src/addon.h
7677
src/iptvsimple/utilities/StreamUtils.h
7778
src/iptvsimple/utilities/TimeUtils.h
7879
src/iptvsimple/utilities/WebUtils.h
79-
src/iptvsimple/utilities/XMLUtils.h)
80+
src/iptvsimple/utilities/XMLUtils.h
81+
src/iptvsimple/utilities/StringComparer.h)
8082

8183
addon_version(pvr.iptvsimple IPTV)
8284
add_definitions(-DIPTV_VERSION=${IPTV_VERSION})

src/iptvsimple/Channels.cpp

+48-8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "ChannelGroups.h"
1111
#include "utilities/FileUtils.h"
1212
#include "utilities/Logger.h"
13+
#include "utilities/StringComparer.h"
1314

1415
#include <regex>
1516

@@ -136,30 +137,69 @@ Channel* Channels::GetChannel(int uniqueId)
136137

137138
const Channel* Channels::FindChannel(const std::string& id, const std::string& displayName) const
138139
{
140+
double maxSimilarity = 0.0;
141+
Channel* bestMatch = nullptr;
142+
139143
for (const auto& myChannel : m_channels)
140144
{
141-
if (StringUtils::EqualsNoCase(myChannel.GetTvgId(), id))
142-
return &myChannel;
145+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
146+
myChannel.GetTvgId(), id, m_settings->IgnoreCaseForEpgChannelIds());
147+
148+
if (similarity > maxSimilarity)
149+
{
150+
maxSimilarity = similarity;
151+
bestMatch = const_cast<Channel*>(&myChannel);
152+
153+
if (maxSimilarity == 1.0)
154+
break;
143155
}
156+
}
157+
158+
if (bestMatch != nullptr)
159+
return bestMatch;
144160

145161
if (displayName.empty())
146162
return nullptr;
147163

148164
const std::string convertedDisplayName = std::regex_replace(displayName, std::regex(" "), "_");
149165
for (const auto& myChannel : m_channels)
150166
{
151-
if (StringUtils::EqualsNoCase(myChannel.GetTvgName(), convertedDisplayName) ||
152-
StringUtils::EqualsNoCase(myChannel.GetTvgName(), displayName))
153-
return &myChannel;
167+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
168+
myChannel.GetTvgName(), convertedDisplayName, m_settings->IgnoreCaseForEpgChannelIds());
169+
170+
if (similarity > maxSimilarity)
171+
{
172+
maxSimilarity = similarity;
173+
bestMatch = const_cast<Channel*>(&myChannel);
174+
175+
if (maxSimilarity == 1.0)
176+
break;
177+
}
154178
}
155179

180+
if (bestMatch != nullptr)
181+
return bestMatch;
182+
156183
for (const auto& myChannel : m_channels)
157184
{
158-
if (StringUtils::EqualsNoCase(myChannel.GetChannelName(), displayName))
159-
return &myChannel;
185+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
186+
myChannel.GetTvgName(), displayName, m_settings->IgnoreCaseForEpgChannelIds());
187+
188+
if (similarity == 0.0)
189+
similarity = utilities::StringComparer::SorensenDiceSimilarity(
190+
myChannel.GetChannelName(), displayName, m_settings->IgnoreCaseForEpgChannelIds());
191+
192+
if (similarity > maxSimilarity)
193+
{
194+
maxSimilarity = similarity;
195+
bestMatch = const_cast<Channel*>(&myChannel);
196+
197+
if (maxSimilarity == 1.0)
198+
break;
199+
}
160200
}
161201

162-
return nullptr;
202+
return bestMatch;
163203
}
164204

165205
int Channels::GenerateChannelId(const char* channelName, const char* streamUrl)

src/iptvsimple/Epg.cpp

+62-28
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@
99

1010
#include "utilities/FileUtils.h"
1111
#include "utilities/Logger.h"
12+
#include "utilities/StringComparer.h"
1213
#include "utilities/XMLUtils.h"
1314

15+
#include <algorithm>
1416
#include <chrono>
1517
#include <regex>
18+
#include <set>
1619
#include <thread>
1720

1821
#include <kodi/tools/StringUtils.h>
@@ -433,71 +436,102 @@ PVR_ERROR Epg::GetEPGForChannel(int channelUid, time_t epgWindowStart, time_t ep
433436
return PVR_ERROR_NO_ERROR;
434437
}
435438

436-
namespace
437-
{
438-
bool TvgIdMatchesCaseOrNoCase(const std::string& idOne, const std::string& idTwo, bool ignoreCaseForEpgChannelIds)
439-
{
440-
if (ignoreCaseForEpgChannelIds)
441-
return StringUtils::EqualsNoCase(idOne, idTwo);
442-
else
443-
return idOne == idTwo;
444-
}
445-
}
446-
447439
ChannelEpg* Epg::FindEpgForChannel(const std::string& id) const
448440
{
449441
for (auto& myChannelEpg : m_channelEpgs)
450442
{
451-
if (TvgIdMatchesCaseOrNoCase(myChannelEpg.GetId(), id, m_settings->IgnoreCaseForEpgChannelIds()))
443+
if (m_settings->IgnoreCaseForEpgChannelIds())
444+
{
445+
if (StringUtils::EqualsNoCase(myChannelEpg.GetId(), id))
446+
return const_cast<ChannelEpg*>(&myChannelEpg);
447+
}
448+
else if (myChannelEpg.GetId() == id)
449+
{
452450
return const_cast<ChannelEpg*>(&myChannelEpg);
451+
}
453452
}
454453

455454
return nullptr;
456455
}
457456

458457
ChannelEpg* Epg::FindEpgForChannel(const Channel& channel) const
459458
{
460-
for (auto& myChannelEpg : m_channelEpgs)
461-
{
462-
if (TvgIdMatchesCaseOrNoCase(myChannelEpg.GetId(), channel.GetTvgId(), m_settings->IgnoreCaseForEpgChannelIds()))
463-
return const_cast<ChannelEpg*>(&myChannelEpg);
464-
}
459+
double maxSimilarity = 0.0;
460+
ChannelEpg* bestMatch = nullptr;
465461

466462
for (auto& myChannelEpg : m_channelEpgs)
467463
{
468-
for (const DisplayNamePair& displayNamePair : myChannelEpg.GetDisplayNames())
464+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
465+
myChannelEpg.GetId(), channel.GetTvgId(), m_settings->IgnoreCaseForEpgChannelIds());
466+
467+
if (similarity > maxSimilarity)
469468
{
470-
if (StringUtils::EqualsNoCase(displayNamePair.m_displayNameWithUnderscores, channel.GetTvgName()) ||
471-
StringUtils::EqualsNoCase(displayNamePair.m_displayName, channel.GetTvgName()))
472-
return const_cast<ChannelEpg*>(&myChannelEpg);
469+
maxSimilarity = similarity;
470+
bestMatch = const_cast<ChannelEpg*>(&myChannelEpg);
471+
472+
if (maxSimilarity == 1.0)
473+
break;
473474
}
474475
}
476+
477+
if (bestMatch != nullptr)
478+
return bestMatch;
475479

476480
for (auto& myChannelEpg : m_channelEpgs)
477481
{
478482
for (const DisplayNamePair& displayNamePair : myChannelEpg.GetDisplayNames())
479483
{
480-
if (StringUtils::EqualsNoCase(displayNamePair.m_displayName, channel.GetChannelName()))
481-
return const_cast<ChannelEpg*>(&myChannelEpg);
484+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
485+
displayNamePair.m_displayName, channel.GetTvgName(),
486+
m_settings->IgnoreCaseForEpgChannelIds());
487+
488+
if (similarity == 0.0)
489+
similarity = utilities::StringComparer::SorensenDiceSimilarity(
490+
displayNamePair.m_displayNameWithUnderscores, channel.GetTvgName(),
491+
m_settings->IgnoreCaseForEpgChannelIds());
492+
493+
if (similarity == 0.0)
494+
similarity = utilities::StringComparer::SorensenDiceSimilarity(
495+
displayNamePair.m_displayName, channel.GetChannelName(),
496+
m_settings->IgnoreCaseForEpgChannelIds());
497+
498+
if (similarity > maxSimilarity)
499+
{
500+
maxSimilarity = similarity;
501+
bestMatch = const_cast<ChannelEpg*>(&myChannelEpg);
502+
503+
if (maxSimilarity == 1.0)
504+
break;
505+
}
482506
}
507+
if (maxSimilarity == 1.0)
508+
break;
483509
}
484510

485-
return nullptr;
511+
return bestMatch;
486512
}
487513

488514
ChannelEpg* Epg::FindEpgForMediaEntry(const MediaEntry& mediaEntry) const
489515
{
490516
for (auto& myChannelEpg : m_channelEpgs)
491517
{
492-
if (TvgIdMatchesCaseOrNoCase(myChannelEpg.GetId(), mediaEntry.GetTvgId(), m_settings->IgnoreCaseForEpgChannelIds()))
518+
if (m_settings->IgnoreCaseForEpgChannelIds())
519+
{
520+
if (StringUtils::EqualsNoCase(myChannelEpg.GetId(), mediaEntry.GetTvgId()))
521+
return const_cast<ChannelEpg*>(&myChannelEpg);
522+
}
523+
else if (myChannelEpg.GetId() == mediaEntry.GetTvgId())
524+
{
493525
return const_cast<ChannelEpg*>(&myChannelEpg);
526+
}
494527
}
495528

496529
for (auto& myChannelEpg : m_channelEpgs)
497530
{
498531
for (const DisplayNamePair& displayNamePair : myChannelEpg.GetDisplayNames())
499532
{
500-
if (StringUtils::EqualsNoCase(displayNamePair.m_displayNameWithUnderscores, mediaEntry.GetTvgName()) ||
533+
if (StringUtils::EqualsNoCase(displayNamePair.m_displayNameWithUnderscores,
534+
mediaEntry.GetTvgName()) ||
501535
StringUtils::EqualsNoCase(displayNamePair.m_displayName, mediaEntry.GetTvgName()))
502536
return const_cast<ChannelEpg*>(&myChannelEpg);
503537
}
@@ -511,7 +545,7 @@ ChannelEpg* Epg::FindEpgForMediaEntry(const MediaEntry& mediaEntry) const
511545
if (StringUtils::EqualsNoCase(displayNamePair.m_displayName, mediaEntry.GetM3UName()))
512546
return const_cast<ChannelEpg*>(&myChannelEpg);
513547
}
514-
}
548+
}
515549

516550
return nullptr;
517551
}
@@ -522,7 +556,7 @@ void Epg::ApplyChannelsLogosFromEPG()
522556

523557
for (const auto& channel : m_channels.GetChannelsList())
524558
{
525-
const ChannelEpg* channelEpg = FindEpgForChannel(channel);
559+
const ChannelEpg* channelEpg = FindEpgForChannel(channel.GetTvgName());
526560
if (!channelEpg || channelEpg->GetIconPath().empty())
527561
continue;
528562

src/iptvsimple/data/EpgEntry.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ bool EpgEntry::UpdateFrom(const xml_node& programmeNode, const std::string& id,
214214
return false;
215215

216216
m_broadcastId = static_cast<int>(programmeStart);
217+
m_plot = id + "\n" + GetNodeValue(programmeNode, "desc");
217218
m_channelId = std::atoi(id.c_str());
218219
m_genreType = 0;
219220
m_genreSubType = 0;
@@ -227,7 +228,6 @@ bool EpgEntry::UpdateFrom(const xml_node& programmeNode, const std::string& id,
227228
m_seasonNumber = EPG_TAG_INVALID_SERIES_EPISODE;
228229

229230
m_title = GetNodeValue(programmeNode, "title");
230-
m_plot = GetNodeValue(programmeNode, "desc");
231231
m_episodeName = GetNodeValue(programmeNode, "sub-title");
232232

233233
m_genreString = GetJoinedNodeValues(programmeNode, "category");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright (C) 2005-2021 Team Kodi (https://kodi.tv)
3+
*
4+
* SPDX-License-Identifier: GPL-2.0-or-later
5+
* See LICENSE.md for more information.
6+
*/
7+
8+
#include "StringComparer.h"
9+
10+
using namespace iptvsimple;
11+
using namespace iptvsimple::utilities;
12+
13+
double StringComparer::SorensenDiceSimilarity(const std::string& text1,
14+
const std::string& text2,
15+
bool ignoreCaseForEpgChannelIds)
16+
{
17+
// Create local copies for case conversion
18+
std::string str1 = text1;
19+
std::string str2 = text2;
20+
21+
// Empty strings should return 0 similarity
22+
if (str1.empty() || str2.empty() || str1 == "-1" || str2 == "-1")
23+
return 0.0;
24+
25+
if (ignoreCaseForEpgChannelIds)
26+
{
27+
std::transform(str1.begin(), str1.end(), str1.begin(), ::tolower);
28+
std::transform(str2.begin(), str2.end(), str2.begin(), ::tolower);
29+
}
30+
31+
// Identical strings should return 1.0 similarity
32+
if (str1 == str2)
33+
return 1.0;
34+
35+
std::set<std::string> bigrams1;
36+
std::set<std::string> bigrams2;
37+
38+
// Generate bigrams for first string
39+
for (size_t i = 0; i < str1.length() - 1; i++)
40+
bigrams1.insert(str1.substr(i, 2));
41+
42+
// Generate bigrams for second string
43+
for (size_t i = 0; i < str2.length() - 1; i++)
44+
bigrams2.insert(str2.substr(i, 2));
45+
46+
// Handle case where no bigrams were generated
47+
if (bigrams1.empty() || bigrams2.empty())
48+
return 0.0;
49+
50+
// Count intersection
51+
size_t intersection = 0;
52+
for (const auto& bigram : bigrams1)
53+
{
54+
if (bigrams2.find(bigram) != bigrams2.end())
55+
intersection++;
56+
}
57+
58+
// Calculate similarity
59+
return (2.0 * intersection) / (bigrams1.size() + bigrams2.size());
60+
}
+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (C) 2005-2021 Team Kodi (https://kodi.tv)
3+
*
4+
* SPDX-License-Identifier: GPL-2.0-or-later
5+
* See LICENSE.md for more information.
6+
*/
7+
8+
#pragma once
9+
10+
#include <algorithm>
11+
#include <set>
12+
#include <string>
13+
14+
15+
namespace iptvsimple
16+
{
17+
namespace utilities
18+
{
19+
20+
class StringComparer
21+
{
22+
public:
23+
/**
24+
* @brief Calculates the Sørensen-Dice similarity coefficient between two strings
25+
* @param text1 First string to compare
26+
* @param text2 Second string to compare
27+
* @param ignoreCaseForEpgChannelIds Whether to ignore case when comparing strings
28+
* @return A value between 0 and 1, where 1 means the strings are identical
29+
*/
30+
static double SorensenDiceSimilarity(const std::string& text1,
31+
const std::string& text2,
32+
bool ignoreCaseForEpgChannelIds);
33+
};
34+
35+
} // namespace utilities
36+
} // namespace iptvsimple

0 commit comments

Comments
 (0)