From 84a073e6751c06758e2283931db188abe1df99a7 Mon Sep 17 00:00:00 2001 From: Colin Tucker Date: Tue, 22 May 2018 14:57:04 +1000 Subject: [PATCH] Improved method of generating summary from content field. --- src/Extensions/Model/MetaDataExtension.php | 7 +- src/Tools/StringTools.php | 152 +++++++++++++++++++++ 2 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 src/Tools/StringTools.php diff --git a/src/Extensions/Model/MetaDataExtension.php b/src/Extensions/Model/MetaDataExtension.php index 7ff89be..0e4ce08 100644 --- a/src/Extensions/Model/MetaDataExtension.php +++ b/src/Extensions/Model/MetaDataExtension.php @@ -31,6 +31,7 @@ use SilverWare\Forms\DimensionsField; use SilverWare\Forms\FieldSection; use SilverWare\Tools\ImageTools; +use SilverWare\Tools\StringTools; use SilverWare\Tools\ViewTools; use SilverWare\View\GridAware; @@ -599,7 +600,7 @@ public function getMetaSummary() } if ($content = $this->owner->getMetaContent()) { - return DBField::create_field('HTMLFragment', sprintf('

%s

', $this->getContentSummary($content))); + return DBField::create_field('HTMLFragment', sprintf('

%s

', $this->owner->getContentSummary($content))); } } @@ -1141,7 +1142,7 @@ public function getFieldFromParent($name) * * @return string */ - protected function getContentSummary(DBHTMLText $content) + public function getContentSummary(DBHTMLText $content) { switch ($this->owner->SummaryMode) { case self::SUMMARY_FIRST_SENTENCE: @@ -1149,7 +1150,7 @@ protected function getContentSummary(DBHTMLText $content) case self::SUMMARY_FIRST_PARAGRAPH: return $content->FirstParagraph(); default: - return $content->Summary(); + return StringTools::singleton()->getContentSummary($content); } } diff --git a/src/Tools/StringTools.php b/src/Tools/StringTools.php new file mode 100644 index 0000000..3f8dfa1 --- /dev/null +++ b/src/Tools/StringTools.php @@ -0,0 +1,152 @@ +=5.6.0 + * + * For full copyright and license information, please view the + * LICENSE.md file that was distributed with this source code. + * + * @package SilverWare\Tools + * @author Colin Tucker + * @copyright 2018 Praxis Interactive + * @license https://opensource.org/licenses/BSD-3-Clause BSD-3-Clause + * @link https://github.com/praxisnetau/silverware + */ + +namespace SilverWare\Tools; + +use SilverStripe\Core\Convert; +use SilverStripe\Core\Injector\Injectable; +use SilverStripe\ORM\FieldType\DBHTMLText; + +/** + * A singleton providing utility functions for use with strings. + * + * @package SilverWare\Tools + * @author Colin Tucker + * @copyright 2018 Praxis Interactive + * @license https://opensource.org/licenses/BSD-3-Clause BSD-3-Clause + * @link https://github.com/praxisnetau/silverware + */ +class StringTools +{ + use Injectable; + + /** + * Answers a summary of the given content field using an improved method of removing tags. + * + * @param DBHTMLText $content + * @param integer $maxWords + * @param string $add + * + * @return string + */ + public function getContentSummary(DBHTMLText $content, $maxWords = 50, $add = '...') + { + // Get Plain Text Version: + + $value = $this->getContentAsPlainText($content); + + // Bail Early (if empty): + + if (!$value) { + return ''; + } + + // Split on Sentences (do not remove period): + + $sentences = array_filter(array_map(function ($str) { + return trim($str); + }, preg_split('@(?<=\.)@', $value))); + + $wordCount = count(preg_split('#\s+#u', $sentences[0])); + + // If the First Sentence is Too Long, Show Only the First $maxWords Words: + + if ($wordCount > $maxWords) { + return implode(' ', array_slice(explode(' ', $sentences[0]), 0, $maxWords)) . $add; + } + + // Add Each Sentence (while there are enough words to do so): + + $result = ''; + + do { + + // Add Next Sentence: + + $result .= ' ' . array_shift($sentences); + + // If More Sentences, Count Number of Words: + + if ($sentences) { + $wordCount += count(preg_split('#\s+#u', $sentences[0])); + } + + } while ($wordCount < $maxWords && $sentences && trim($sentences[0])); + + // Answer Result String: + + return trim($result); + } + + /** + * Converts the given content field to a plain text string using an improved method of removing tags. + * + * @param DBHTMLText $content + * + * @return string + */ + public function getContentAsPlainText(DBHTMLText $content) + { + // Preserve Line Breaks: + + $text = preg_replace('/\/i', "\n", $content->RAW()); + + // Convert Paragraph Breaks to Multi-Lines: + + $text = preg_replace('/\<\/p\>/i', "\n\n", $text); + + // Remove HTML Tags: + + $text = $this->removeTags($text); + + // Implode >3 Consecutive Linebreaks into 2: + + $text = preg_replace('~(\R){2,}~', "\n\n", $text); + + // Decode HTML Entities Back to Plain Text: + + return trim(Convert::xml2raw($text)); + } + + /** + * Removes HTML tags from the given string while maintaining whitespace. + * + * @param string $string + * + * @return string + */ + public function removeTags($string) + { + // Remove HTML Tags: + + $string = preg_replace('/<[^>]*>/', ' ', $string); + + // Remove Control Characters: + + $string = str_replace("\r", '', $string); // replace with empty space + $string = str_replace("\n", ' ', $string); // replace with single space + $string = str_replace("\t", ' ', $string); // replace with single space + + // Remove Multiple Spaces: + + $string = trim(preg_replace('/ {2,}/', ' ', $string)); + + // Answer String: + + return $string; + } +}