Skip to content

Commit

Permalink
Update to 'relevancy'
Browse files Browse the repository at this point in the history
  • Loading branch information
padolsey committed Nov 20, 2011
1 parent 769ec8e commit 246d1af
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 51 deletions.
6 changes: 3 additions & 3 deletions demos/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<html>
<head>
<meta charset="UTF-8" />
<title>Similarity Sorter Test suite</title>
<title>relevancy Sorter Test suite</title>
</head>
<body>

Expand All @@ -16,15 +16,15 @@
</div>

<script src="../test/data.js"></script>
<script src="../lib/similarity.js"></script>
<script src="../lib/relevancy.js"></script>

<script>

var dataSelect = document.getElementById('dataset'),
search = document.getElementById('search'),
data = document.getElementById('data'),

sorter = similarity.Sorter({}, countries);
sorter = relevancy.Sorter({}, countries);

search.onkeyup = function() {

Expand Down
16 changes: 8 additions & 8 deletions lib/similarity.js → lib/relevancy.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
/**
* Similarity.js v0.1.0dev
* relevancy.js v0.1.0dev
**/

(function(){

var S = typeof module != 'undefined' && module && module.exports || (this.similarity = {}),
var S = typeof module != 'undefined' && module && module.exports || (this.relevancy = {}),
toString = Object.prototype.toString,
hasOwn = Object.prototype.hasOwnProperty;

S.sort = function similaritySort(array, subject) {
S.sort = function relevancySort(array, subject) {
return S.defaultSorter.sort(array, subject);
};

S.weight = function similarityWeight(a, b) {
S.weight = function relevancyWeight(a, b) {
return S.defaultSorter._calcWeight(b, S.defaultSorter._generateSubjectRegex(a), a);
};

Expand Down Expand Up @@ -92,7 +92,7 @@
_generateBoundRegex: function() {

this.boundRegex = RegExp(this._bounds.join('|'));
this.lastBoundRegex = RegExp('.+(?:' + this._bounds.join('|') + ')(?=.+$)');
this.lastBoundRegex = RegExp('.+(?:' + this._bounds.join('|') + ')(?=.*$)');

},
setArray: function(array) {
Expand Down Expand Up @@ -153,12 +153,12 @@

if (!match) return null;

var upTillAndInclMatch = value.slice(0, match.index + match.length),
lastBoundIndex = (upTillAndInclMatch.match(this.lastBoundRegex)||[''])[0].length,
var upTillMatch = value.slice(0, match.index),
lastBoundIndex = (upTillMatch.match(this.lastBoundRegex)||[''])[0].length,

matchInValueIndexScore = (1 - (
((match.index - lastBoundIndex) / (value.length - lastBoundIndex))
)) - ((upTillAndInclMatch.split(this.boundRegex).length-1) * .05),
)) - ((upTillMatch.split(this.boundRegex).length-1) * .05),

matchInValueLengthScore = match.length / value.length,

Expand Down
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"name": "similarity",
"description": "Unidirectional string similarity",
"name": "relevancy",
"description": "Unidirectional string relevancy",
"version": "0.1.0",
"author": "James Padolsey https://github.com/jamespadolsey",
"bugs" : {
"url" : "http://github.com/jamespadolsey/similarity.js/issues"
"url" : "http://github.com/jamespadolsey/relevancy.js/issues"
},
"repository": {
"type": "git",
"url": "http://jamespadolsey@github.com/jamespadolsey/similarity.js.git"
"url": "http://jamespadolsey@github.com/jamespadolsey/relevancy.js.git"
},
"main": "lib/similarity",
"main": "lib/relevancy",
"licenses": [],
"contributors": [
"James Padolsey <> (http://github.com/jamespadolsey)"
Expand Down
30 changes: 15 additions & 15 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Similarity Sorter/Weighter
# relevancy Sorter/Weighter

## Objective

The original purpose behind similarity.js was to sort an array of items based on each element's similarity to a single string. This is exactly what autocompletion widgets/scripts claim to do, but many of them suck for the following reasons:
The original purpose behind relevancy.js was to sort an array of items based on each element's relevancy to a single string. This is exactly what autocompletion widgets/scripts claim to do, but many of them suck for the following reasons:

* They only take into account partial matches at the beginning of the strings. If I type "nited kingdom" (missing a "u") nothing will show up.
* They don't actually sort the results dependent on similarity -- they just show you where the partial matches have been found, in alphabetical order usually. Match length/position is rarely considered.
* They don't actually sort the results dependent on relevancy -- they just show you where the partial matches have been found, in alphabetical order usually. Match length/position is rarely considered.

E.g.

Expand All @@ -15,31 +15,31 @@ E.g.

The basic partial-matching implemented by most autocompletion scripts can hamper usability.

Hopefully, similarity.js can rectify this with its not-so-complex weighting algorithm.
Hopefully, relevancy.js can rectify this with its not-so-complex weighting algorithm.

## Intro

**[Version: 0.1.0dev]**

similarity.js contains a basic sorting/weighting algorithm that can be used to weight a short string relative to another short string. It can gage the similarity between two strings, but only in a unidirectional manner (`"Lon"` is more *similar* to `"London"` than `"London"` is to `"Lon"`). This was intentional as its main use-case is autocompletion -- i.e. matching partial typed words against large data lists.
relevancy.js contains a basic sorting/weighting algorithm that can be used to weight a short string relative to another short string. It can gage the relevancy between two strings, but only in a unidirectional manner (`"Lon"` is more *similar* to `"London"` than `"London"` is to `"Lon"`). This was intentional as its main use-case is autocompletion -- i.e. matching partial typed words against large data lists.

similarity.weight('Ame', 'America') > similarity.weight('Ame', 'Armenia'); // => true
relevancy.weight('Ame', 'America') > relevancy.weight('Ame', 'Armenia'); // => true

// Explanation:
// "Ame" has a higher similarity weighting to "America" than to "Armenia"
// "Ame" has a higher relevancy weighting to "America" than to "Armenia"

The *subject* of a single weighting or sorting operation is the string that is being compared against the target string or array. For example:

var subject = 'G';
var array = ['Apple', 'Banana', 'Grape', 'Mango'];

similarity.sort(array, subject); // => ['Grape', 'Mango', 'Apple', 'Banana']
relevancy.sort(array, subject); // => ['Grape', 'Mango', 'Apple', 'Banana']

The elements have been sorted by their similarity to the subject "G", taking the following weights into account:
The elements have been sorted by their relevancy to the subject "G", taking the following weights into account:

* `matchInSubjectLength`: (`0..1`) The proportion of the largest substring match found within the actual subject. So, if the subject is `"Gu"`, but we only match `"G"` (e.g. in `"Grape"`) then proportion would be `0.5`.
* `matchInSubjectIndex`: (`0..1`) The proximity of the match to the start of the subject. For example, given the subject `"A grape"` against the value `"Grapelicious"`, `"grape"` in `"A grape"` matches the beginning of the value, and it is at an index of `2`. The highest possible index of this is `2` (given the match's length: `5`) so `matchInSubjectIndex`, in this example, gets set as zero.
* `matchInValueLength`: (`0..1`) The proportion of the largest substring match found within the target value. For example, we matched the substring `"King"` (length:`4`) against the value `"United Kingdom"` (length:`14`), and `4/14 = ~0.29` **This is not used currently.**, although can be set when you pass a configuration object to `similarity.Sorter`.
* `matchInValueLength`: (`0..1`) The proportion of the largest substring match found within the target value. For example, we matched the substring `"King"` (length:`4`) against the value `"United Kingdom"` (length:`14`), and `4/14 = ~0.29` **This is not used currently.**, although can be set when you pass a configuration object to `relevancy.Sorter`.
* `matchInValueIndex`: (`0..1`) The proximity of the match to the start of the target value. E.g., a match of `"dom"` in `"Kingdom"` -- `"dom"` is matched at an index of `4`. `4` divided by the total length of `7` substracted from `1` gives us our `matchInValueIndex` of `~0.43`.

Note that the algorithm will account for spaces and anchor the calculations above accordingly. So `"Banana Milkshake"` will be weighted higher than `"bananamilk"` given the subject `"milk"` because it begins a word in the former `"Banana Milkshake"` but merely forms a part of another word in the latter, `"bananamilk"`.
Expand All @@ -48,7 +48,7 @@ The default bound of `\s+` is used to find where the calculations should be anch

## Example Implementation

// In this example, we'll also see how similarity.js can
// In this example, we'll also see how relevancy.js can
// deal with nested arrays. Default operation is `max`,
// meaning that it'll get the maximum weight from each sub-array
// and use that for comparing to other sub-arrays.
Expand All @@ -63,20 +63,20 @@ The default bound of `\s+` is used to find where the calculations should be anch
// .......
];

var countrySorter = similarity.Sorter(null, countries);
var countrySorter = relevancy.Sorter(null, countries);

countrySorter.sortBy('Al').slice(0, 5); // => [["AL", "Albania"], ["DZ", "Algeria"]...]

## Configuration

If you want more control you should create a `similarity.Sorter` instance which can accept a configuration object upon instantiation:
If you want more control you should create a `relevancy.Sorter` instance which can accept a configuration object upon instantiation:

var mySorter = new similarity.Sorter({
var mySorter = new relevancy.Sorter({

bounds: ['\\s', '(?=[A-Z])', '-'], // create new bounds (default: ['\\s'])

comparator: function(a, b) {
// When similarity.Sorter finds two items with equal weight
// When relevancy.Sorter finds two items with equal weight
// it will pass them to this function so you can decide
// what to do. I.e. return -1, 1
// Only return 0 if you're prepared for the pain caused
Expand Down
6 changes: 3 additions & 3 deletions test/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
<html>
<head>
<meta charset="UTF-8" />
<title>Similarity Sorter Test suite</title>
<title>Relevancy Sorter Test suite</title>
<link rel="stylesheet" href="qunit.css" type="text/css" media="screen">
<script src="qunit.js"></script>
<script src="data.js"></script>
<script src="test.js"></script>
<script src="../lib/similarity.js"></script>
<script src="../lib/relevancy.js"></script>
</head>
<body>
<h1 id="qunit-header">Similarity Sorter Test suite</h1>
<h1 id="qunit-header">Relevancy Sorter Test suite</h1>
<h2 id="qunit-banner"></h2>
<div id="qunit-testrunner-toolbar"></div>
<h2 id="qunit-userAgent"></h2>
Expand Down
34 changes: 17 additions & 17 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module('101', {
test('Basic first-character ordering', function(){

var unsorted = ['Paul', 'Michael'],
sorted = similarity.sort(unsorted.slice(), 'M');
sorted = relevancy.sort(unsorted.slice(), 'M');

deepEqual(sorted, ['Michael', 'Paul']);

Expand All @@ -16,7 +16,7 @@ test('Basic first-character ordering', function(){
test('No matching items', function() {

var unsorted = ['aaa', 'bbb', 'ccc'],
sorted = similarity.sort(unsorted.slice(), 'x');
sorted = relevancy.sort(unsorted.slice(), 'x');

deepEqual(sorted, unsorted);

Expand All @@ -25,7 +25,7 @@ test('No matching items', function() {
test('Some matching items', function(){

var unsorted = ['Sarah', 'Julie', 'Michael', 'Paul', 'Amanada'],
sorted = similarity.sort(unsorted.slice(), 'M');
sorted = relevancy.sort(unsorted.slice(), 'M');

deepEqual(sorted, ['Michael', 'Amanada', 'Sarah', 'Julie', 'Paul']);

Expand All @@ -34,7 +34,7 @@ test('Some matching items', function(){
test('Items containing matches', function(){

var unsorted = ['AAAA', 'ABBA', 'CCCC', 'CBBC'],
sorted = similarity.sort(unsorted.slice(), 'BB');
sorted = relevancy.sort(unsorted.slice(), 'BB');

deepEqual(sorted, ['ABBA', 'CBBC', 'AAAA', 'CCCC']);

Expand All @@ -43,7 +43,7 @@ test('Items containing matches', function(){
test('Distance-from-start', function(){

var unsorted = ['..a', '.a', '....a', 'a', '...a'],
sorted = similarity.sort(unsorted.slice(), 'a');
sorted = relevancy.sort(unsorted.slice(), 'a');

deepEqual(sorted, ['a', '.a', '..a', '...a', '....a']);

Expand Down Expand Up @@ -74,7 +74,7 @@ test('Basic names - ^Ja', function(){
'Jan',
'George'
],
sorted = similarity.sort(unsorted.slice(), 'Ja');
sorted = relevancy.sort(unsorted.slice(), 'Ja');

deepEqual(
sorted.slice(0, 5),
Expand All @@ -101,7 +101,7 @@ test('Full names', function(){
'Bob',
'Sarah Smith'
],
sorted = similarity.sort(unsorted.slice(), 'Sm');
sorted = relevancy.sort(unsorted.slice(), 'Sm');

deepEqual(
sorted.slice(0, 5),
Expand All @@ -118,24 +118,24 @@ test('Full names', function(){

test('Countries - single full', function(){

deepEqual(similarity.sort(countries, 'GB')[0], ['GB', 'United Kingdom']);
deepEqual(similarity.sort(countries, 'United States')[0], ['US', 'United States']);
deepEqual(similarity.sort(countries, 'Saint Lucia')[0], ['LC', 'Saint Lucia']);
deepEqual(similarity.sort(countries, 'CU')[0], ['CU', 'Cuba']);
deepEqual(relevancy.sort(countries, 'GB')[0], ['GB', 'United Kingdom']);
deepEqual(relevancy.sort(countries, 'United States')[0], ['US', 'United States']);
deepEqual(relevancy.sort(countries, 'Saint Lucia')[0], ['LC', 'Saint Lucia']);
deepEqual(relevancy.sort(countries, 'CU')[0], ['CU', 'Cuba']);

});

test('Countries - single partial', function(){

var sorted = similarity.sort(countries, 'Ukr');
var sorted = relevancy.sort(countries, 'Ukr');

deepEqual(sorted[0], ['UA', 'Ukraine']);

});

test('Countries - single partial - second word', function(){

var sorted = similarity.sort(countries, 'Poly');
var sorted = relevancy.sort(countries, 'Poly');

deepEqual(sorted[0], ['PF', 'French Polynesia']);

Expand All @@ -149,7 +149,7 @@ module('subArrayWeightOperations (max, min, avg, custom', {

test('max', function(){

var sorted = similarity.sort([
var sorted = relevancy.sort([
['b', 'c', 'a'],
['cccc', 'bbbb', 'cccc'],
['bb', 'aa', 'cc']
Expand All @@ -175,7 +175,7 @@ module('Misc. configs', {
test('Basic names - Custom secondary comparator (retain index)', function(){

// It's possible to specify the secondary comparator used when
// weights are found to be equal. similarity.js will attempt
// weights are found to be equal. relevancy.js will attempt
// to retain original positions, but some engines (V8!) don't
// have stable sorts... and so this kind of thing can become a necessity.

Expand All @@ -202,7 +202,7 @@ test('Basic names - Custom secondary comparator (retain index)', function(){
indexes[unsorted[i]] = i;
}

var sorted = similarity.Sorter({
var sorted = relevancy.Sorter({
comparator: function(a, b) {
return indexes[a] > indexes[b] ? 1 : -1;
}
Expand Down Expand Up @@ -234,7 +234,7 @@ test('Custom bound - camelCase', function(){
];

deepEqual(
similarity.Sorter({
relevancy.Sorter({
bounds: ['\\s', '(?=[A-Z])']
}).sort(array, 'script'),
[
Expand Down

0 comments on commit 246d1af

Please sign in to comment.