Skip to content

Commit 108c6a7

Browse files
committed
Improve channel and EPG matching with Sørensen-Dice similarity algorithm
- Add StringComparer utility to calculate string similarity - Enhance channel and EPG matching to use fuzzy string comparison - Update channel and EPG finding methods to support more flexible matching - Modify EPG entry description to include channel ID - Improve EPG loading efficiency by filtering out empty entries
1 parent bb0b332 commit 108c6a7

File tree

12 files changed

+272
-49
lines changed

12 files changed

+272
-49
lines changed

CMakeLists.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ set(IPTV_SOURCES src/addon.cpp
4545
src/iptvsimple/utilities/Logger.cpp
4646
src/iptvsimple/utilities/SettingsMigration.cpp
4747
src/iptvsimple/utilities/StreamUtils.cpp
48-
src/iptvsimple/utilities/WebUtils.cpp)
48+
src/iptvsimple/utilities/WebUtils.cpp
49+
src/iptvsimple/utilities/StringComparer.cpp)
4950

5051
set(IPTV_HEADERS src/addon.h
5152
src/IptvSimple.h
@@ -76,7 +77,8 @@ set(IPTV_HEADERS src/addon.h
7677
src/iptvsimple/utilities/StreamUtils.h
7778
src/iptvsimple/utilities/TimeUtils.h
7879
src/iptvsimple/utilities/WebUtils.h
79-
src/iptvsimple/utilities/XMLUtils.h)
80+
src/iptvsimple/utilities/XMLUtils.h
81+
src/iptvsimple/utilities/StringComparer.h)
8082

8183
addon_version(pvr.iptvsimple IPTV)
8284
add_definitions(-DIPTV_VERSION=${IPTV_VERSION})

pvr.iptvsimple/resources/instance-settings.xml

+10
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,16 @@
477477
<default>false</default>
478478
<control type="toggle" />
479479
</setting>
480+
<setting id="epgChannelNameMatchThreshold" type="number" label="30081" help="30630">
481+
<level>2</level>
482+
<default>90</default>
483+
<constraints>
484+
<minimum>1</minimum>
485+
<maximum>100</maximum>
486+
<step>5</step>
487+
</constraints>
488+
<control type="slider" format="percentage"/>
489+
</setting>
480490
<setting id="epgIgnoreCaseForChannelIds" type="boolean" label="30077" help="30627">
481491
<level>2</level>
482492
<default>true</default>

pvr.iptvsimple/resources/language/resource.language.en_gb/strings.po

+13-3
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,12 @@ msgctxt "#30080"
449449
msgid "Interval for check"
450450
msgstr ""
451451

452-
#empty strings from id 30081 to 30099
452+
#. label: EPG Settings - epgChannelNameMatchThreshold
453+
msgctxt "#30081"
454+
msgid "EPG Channel Name Match Threshold"
455+
msgstr ""
456+
457+
#empty strings from id 30082 to 30099
453458

454459
#. label-category: catchup
455460
#. label-group: Catchup - Catchup
@@ -853,7 +858,12 @@ msgctxt "#30629"
853858
msgid "When checking for a valid M3U file, the length of time to wait between attempts. Note that a valid file will only be checked for on startup and once a valid file is found all checks stop."
854859
msgstr ""
855860

856-
#empty strings from id 30630 to 30639
861+
#. help: EPG Settings - epgChannelNameMatchThreshold
862+
msgctxt "#30630"
863+
msgid "Minimum similarity percentage required when matching EPG channel names. Lower values will match more channels but may cause incorrect matches."
864+
msgstr ""
865+
866+
#empty strings from id 30631 to 30639
857867

858868
#. help info - Channel Logos
859869

@@ -1129,4 +1139,4 @@ msgstr ""
11291139
#. label: Media - mediaForcePlaylist
11301140
msgctxt "#30806"
11311141
msgid "Force the full playlist to be media, regardless of what tags are present. Since the introduction of multiple instances for PVR add-ons this option can be useful."
1132-
msgstr ""
1142+
msgstr ""

pvr.iptvsimple/resources/settings.xml

+4
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@
206206
<level>4</level> <!-- hidden -->
207207
<default>false</default>
208208
</setting>
209+
<setting id="epgChannelNameMatchThreshold" type="number">
210+
<level>4</level> <!-- hidden -->
211+
<default>90</default>
212+
</setting>
209213
<setting id="epgIgnoreCaseForChannelIds" type="boolean">
210214
<level>4</level> <!-- hidden -->
211215
<default>true</default>

src/iptvsimple/Channels.cpp

+48-8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "ChannelGroups.h"
1111
#include "utilities/FileUtils.h"
1212
#include "utilities/Logger.h"
13+
#include "utilities/StringComparer.h"
1314

1415
#include <regex>
1516

@@ -136,30 +137,69 @@ Channel* Channels::GetChannel(int uniqueId)
136137

137138
const Channel* Channels::FindChannel(const std::string& id, const std::string& displayName) const
138139
{
140+
double maxSimilarity = 0.0;
141+
Channel* bestMatch = nullptr;
142+
139143
for (const auto& myChannel : m_channels)
140144
{
141-
if (StringUtils::EqualsNoCase(myChannel.GetTvgId(), id))
142-
return &myChannel;
145+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
146+
myChannel.GetTvgId(), id, m_settings);
147+
148+
if (similarity > maxSimilarity)
149+
{
150+
maxSimilarity = similarity;
151+
bestMatch = const_cast<Channel*>(&myChannel);
152+
153+
if (maxSimilarity == 1.0)
154+
break;
143155
}
156+
}
157+
158+
if (bestMatch != nullptr)
159+
return bestMatch;
144160

145161
if (displayName.empty())
146162
return nullptr;
147163

148164
const std::string convertedDisplayName = std::regex_replace(displayName, std::regex(" "), "_");
149165
for (const auto& myChannel : m_channels)
150166
{
151-
if (StringUtils::EqualsNoCase(myChannel.GetTvgName(), convertedDisplayName) ||
152-
StringUtils::EqualsNoCase(myChannel.GetTvgName(), displayName))
153-
return &myChannel;
167+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
168+
myChannel.GetTvgName(), convertedDisplayName, m_settings);
169+
170+
if (similarity > maxSimilarity)
171+
{
172+
maxSimilarity = similarity;
173+
bestMatch = const_cast<Channel*>(&myChannel);
174+
175+
if (maxSimilarity == 1.0)
176+
break;
177+
}
154178
}
155179

180+
if (bestMatch != nullptr)
181+
return bestMatch;
182+
156183
for (const auto& myChannel : m_channels)
157184
{
158-
if (StringUtils::EqualsNoCase(myChannel.GetChannelName(), displayName))
159-
return &myChannel;
185+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
186+
myChannel.GetTvgName(), displayName, m_settings);
187+
188+
if (similarity == 0.0)
189+
similarity = utilities::StringComparer::SorensenDiceSimilarity(
190+
myChannel.GetChannelName(), displayName, m_settings);
191+
192+
if (similarity > maxSimilarity)
193+
{
194+
maxSimilarity = similarity;
195+
bestMatch = const_cast<Channel*>(&myChannel);
196+
197+
if (maxSimilarity == 1.0)
198+
break;
199+
}
160200
}
161201

162-
return nullptr;
202+
return bestMatch;
163203
}
164204

165205
int Channels::GenerateChannelId(const char* channelName, const char* streamUrl)

src/iptvsimple/Epg.cpp

+74-34
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@
99

1010
#include "utilities/FileUtils.h"
1111
#include "utilities/Logger.h"
12+
#include "utilities/StringComparer.h"
1213
#include "utilities/XMLUtils.h"
1314

15+
#include <algorithm>
1416
#include <chrono>
1517
#include <regex>
18+
#include <set>
1619
#include <thread>
1720

1821
#include <kodi/tools/StringUtils.h>
@@ -361,9 +364,19 @@ void Epg::LoadEpgEntries(const xml_node& rootElement, int epgWindowStart, int ep
361364
}
362365

363366
Logger::Log(LEVEL_INFO, "%s - Loaded '%d' EPG entries.", __FUNCTION__, count);
367+
368+
// Remove channelEpg that have empty EPG entries
369+
const int epgChannelCount = m_channelEpgs.size();
370+
m_channelEpgs.erase(
371+
std::remove_if(m_channelEpgs.begin(), m_channelEpgs.end(),
372+
[](ChannelEpg& channelEpg) {
373+
return channelEpg.GetEpgEntries().empty();
374+
}),
375+
m_channelEpgs.end()
376+
);
377+
Logger::Log(LEVEL_INFO, "%s - Number of channels with EPG data after cleanup: %d (Removed %d channelEPGs)", __FUNCTION__, m_channelEpgs.size(), epgChannelCount - m_channelEpgs.size());
364378
}
365379

366-
367380
void Epg::ReloadEPG()
368381
{
369382
m_xmltvLocation = m_settings->GetEpgLocation();
@@ -387,11 +400,6 @@ void Epg::ReloadEPG()
387400

388401
PVR_ERROR Epg::GetEPGForChannel(int channelUid, time_t epgWindowStart, time_t epgWindowEnd, kodi::addon::PVREPGTagsResultSet& results)
389402
{
390-
for (const auto& myChannel : m_channels.GetChannelsList())
391-
{
392-
if (myChannel.GetUniqueId() != channelUid)
393-
continue;
394-
395403
if (epgWindowStart > m_lastStart || epgWindowEnd > m_lastEnd)
396404
{
397405
// reload EPG for new time interval only
@@ -404,6 +412,10 @@ PVR_ERROR Epg::GetEPGForChannel(int channelUid, time_t epgWindowStart, time_t ep
404412
m_lastEnd = static_cast<int>(epgWindowEnd);
405413
}
406414
}
415+
for (const auto& myChannel : m_channels.GetChannelsList())
416+
{
417+
if (myChannel.GetUniqueId() != channelUid)
418+
continue;
407419

408420
ChannelEpg* channelEpg = FindEpgForChannel(myChannel);
409421
if (!channelEpg || channelEpg->GetEpgEntries().size() == 0)
@@ -433,71 +445,99 @@ PVR_ERROR Epg::GetEPGForChannel(int channelUid, time_t epgWindowStart, time_t ep
433445
return PVR_ERROR_NO_ERROR;
434446
}
435447

436-
namespace
437-
{
438-
bool TvgIdMatchesCaseOrNoCase(const std::string& idOne, const std::string& idTwo, bool ignoreCaseForEpgChannelIds)
439-
{
440-
if (ignoreCaseForEpgChannelIds)
441-
return StringUtils::EqualsNoCase(idOne, idTwo);
442-
else
443-
return idOne == idTwo;
444-
}
445-
}
446-
447448
ChannelEpg* Epg::FindEpgForChannel(const std::string& id) const
448449
{
449450
for (auto& myChannelEpg : m_channelEpgs)
450451
{
451-
if (TvgIdMatchesCaseOrNoCase(myChannelEpg.GetId(), id, m_settings->IgnoreCaseForEpgChannelIds()))
452+
if (m_settings->IgnoreCaseForEpgChannelIds())
453+
{
454+
if (StringUtils::EqualsNoCase(myChannelEpg.GetId(), id))
455+
return const_cast<ChannelEpg*>(&myChannelEpg);
456+
}
457+
else if (myChannelEpg.GetId() == id)
458+
{
452459
return const_cast<ChannelEpg*>(&myChannelEpg);
460+
}
453461
}
454462

455463
return nullptr;
456464
}
457465

458466
ChannelEpg* Epg::FindEpgForChannel(const Channel& channel) const
459467
{
460-
for (auto& myChannelEpg : m_channelEpgs)
461-
{
462-
if (TvgIdMatchesCaseOrNoCase(myChannelEpg.GetId(), channel.GetTvgId(), m_settings->IgnoreCaseForEpgChannelIds()))
463-
return const_cast<ChannelEpg*>(&myChannelEpg);
464-
}
468+
double maxSimilarity = 0.0;
469+
ChannelEpg* bestMatch = nullptr;
465470

466471
for (auto& myChannelEpg : m_channelEpgs)
467472
{
468-
for (const DisplayNamePair& displayNamePair : myChannelEpg.GetDisplayNames())
473+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
474+
myChannelEpg.GetId(), channel.GetTvgId(), m_settings);
475+
476+
if (similarity > maxSimilarity)
469477
{
470-
if (StringUtils::EqualsNoCase(displayNamePair.m_displayNameWithUnderscores, channel.GetTvgName()) ||
471-
StringUtils::EqualsNoCase(displayNamePair.m_displayName, channel.GetTvgName()))
472-
return const_cast<ChannelEpg*>(&myChannelEpg);
478+
maxSimilarity = similarity;
479+
bestMatch = const_cast<ChannelEpg*>(&myChannelEpg);
480+
481+
if (maxSimilarity == 1.0)
482+
break;
473483
}
474484
}
485+
486+
if (bestMatch != nullptr)
487+
return bestMatch;
475488

476489
for (auto& myChannelEpg : m_channelEpgs)
477490
{
478491
for (const DisplayNamePair& displayNamePair : myChannelEpg.GetDisplayNames())
479492
{
480-
if (StringUtils::EqualsNoCase(displayNamePair.m_displayName, channel.GetChannelName()))
481-
return const_cast<ChannelEpg*>(&myChannelEpg);
493+
double similarity = utilities::StringComparer::SorensenDiceSimilarity(
494+
displayNamePair.m_displayName, channel.GetTvgName(), m_settings);
495+
496+
if (similarity == 0.0)
497+
similarity = utilities::StringComparer::SorensenDiceSimilarity(
498+
displayNamePair.m_displayNameWithUnderscores, channel.GetTvgName(), m_settings);
499+
500+
if (similarity == 0.0)
501+
similarity = utilities::StringComparer::SorensenDiceSimilarity(
502+
displayNamePair.m_displayName, channel.GetChannelName(), m_settings);
503+
504+
if (similarity > maxSimilarity)
505+
{
506+
maxSimilarity = similarity;
507+
bestMatch = const_cast<ChannelEpg*>(&myChannelEpg);
508+
509+
if (maxSimilarity == 1.0)
510+
break;
511+
}
482512
}
513+
if (maxSimilarity == 1.0)
514+
break;
483515
}
484516

485-
return nullptr;
517+
return bestMatch;
486518
}
487519

488520
ChannelEpg* Epg::FindEpgForMediaEntry(const MediaEntry& mediaEntry) const
489521
{
490522
for (auto& myChannelEpg : m_channelEpgs)
491523
{
492-
if (TvgIdMatchesCaseOrNoCase(myChannelEpg.GetId(), mediaEntry.GetTvgId(), m_settings->IgnoreCaseForEpgChannelIds()))
524+
if (m_settings->IgnoreCaseForEpgChannelIds())
525+
{
526+
if (StringUtils::EqualsNoCase(myChannelEpg.GetId(), mediaEntry.GetTvgId()))
527+
return const_cast<ChannelEpg*>(&myChannelEpg);
528+
}
529+
else if (myChannelEpg.GetId() == mediaEntry.GetTvgId())
530+
{
493531
return const_cast<ChannelEpg*>(&myChannelEpg);
532+
}
494533
}
495534

496535
for (auto& myChannelEpg : m_channelEpgs)
497536
{
498537
for (const DisplayNamePair& displayNamePair : myChannelEpg.GetDisplayNames())
499538
{
500-
if (StringUtils::EqualsNoCase(displayNamePair.m_displayNameWithUnderscores, mediaEntry.GetTvgName()) ||
539+
if (StringUtils::EqualsNoCase(displayNamePair.m_displayNameWithUnderscores,
540+
mediaEntry.GetTvgName()) ||
501541
StringUtils::EqualsNoCase(displayNamePair.m_displayName, mediaEntry.GetTvgName()))
502542
return const_cast<ChannelEpg*>(&myChannelEpg);
503543
}
@@ -511,7 +551,7 @@ ChannelEpg* Epg::FindEpgForMediaEntry(const MediaEntry& mediaEntry) const
511551
if (StringUtils::EqualsNoCase(displayNamePair.m_displayName, mediaEntry.GetM3UName()))
512552
return const_cast<ChannelEpg*>(&myChannelEpg);
513553
}
514-
}
554+
}
515555

516556
return nullptr;
517557
}
@@ -522,7 +562,7 @@ void Epg::ApplyChannelsLogosFromEPG()
522562

523563
for (const auto& channel : m_channels.GetChannelsList())
524564
{
525-
const ChannelEpg* channelEpg = FindEpgForChannel(channel);
565+
const ChannelEpg* channelEpg = FindEpgForChannel(channel.GetTvgName());
526566
if (!channelEpg || channelEpg->GetIconPath().empty())
527567
continue;
528568

src/iptvsimple/InstanceSettings.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ void InstanceSettings::ReadSettings()
105105
m_instance.CheckInstanceSettingFloat("epgTimeShift", m_epgTimeShiftHours);
106106
m_instance.CheckInstanceSettingBoolean("epgTSOverride", m_tsOverride);
107107
m_instance.CheckInstanceSettingBoolean("epgIgnoreCaseForChannelIds", m_ignoreCaseForEpgChannelIds);
108+
m_instance.CheckInstanceSettingInt("epgChannelNameMatchThreshold", m_epgChannelNameMatchThreshold);
108109

109110
//Genres
110111
m_instance.CheckInstanceSettingBoolean("useEpgGenreText", m_useEpgGenreTextWhenMapping);
@@ -258,6 +259,8 @@ ADDON_STATUS InstanceSettings::SetSetting(const std::string& settingName, const
258259
return SetSetting<bool, ADDON_STATUS>(settingName, settingValue, m_tsOverride, ADDON_STATUS_OK, ADDON_STATUS_OK);
259260
else if (settingName == "epgIgnoreCaseForChannelIds")
260261
return SetSetting<bool, ADDON_STATUS>(settingName, settingValue, m_ignoreCaseForEpgChannelIds, ADDON_STATUS_OK, ADDON_STATUS_OK);
262+
else if (settingName == "epgChannelNameMatchThreshold")
263+
return SetSetting<int, ADDON_STATUS>(settingName, settingValue, m_epgChannelNameMatchThreshold, ADDON_STATUS_OK, ADDON_STATUS_OK);
261264
// Genres
262265
else if (settingName == "useEpgGenreText")
263266
return SetSetting<bool, ADDON_STATUS>(settingName, settingValue, m_useEpgGenreTextWhenMapping, ADDON_STATUS_OK, ADDON_STATUS_OK);

0 commit comments

Comments
 (0)