From 9354af540ed2c112c3031f028bc3bca14fa0b62c Mon Sep 17 00:00:00 2001 From: Konrad Dzwinel Date: Sun, 11 Jan 2015 23:31:14 +0100 Subject: [PATCH] - Settings file introduced - Main module separated from everything else - "show correct paragraphs" option removed - custom dictionary removed --- bin/cmd.js | 70 ++++++++++++++++++++ bin/proofreader.js.sh | 146 ------------------------------------------ lib/proofreader.js | 108 +++++++++++++++++++++++++++++++ package.json | 4 +- settings.json | 10 +++ 5 files changed, 190 insertions(+), 148 deletions(-) create mode 100755 bin/cmd.js delete mode 100755 bin/proofreader.js.sh create mode 100644 lib/proofreader.js create mode 100644 settings.json diff --git a/bin/cmd.js b/bin/cmd.js new file mode 100755 index 0000000..5a5687e --- /dev/null +++ b/bin/cmd.js @@ -0,0 +1,70 @@ +#!/usr/bin/env node + +//Helpers +var fs = require('fs'); +var request = require('request'); +var program = require('commander'); +var mime = require('mime'); +var marked = require('marked'); +var Promise = require('promise'); +var config = require('../settings.json'); +var Proofreader = require('../lib/proofreader.js'); + +program + .option('-u, --url [url]', 'URL to website that should be proofread.') + .option('-f, --file [path]', 'Path to HTML file that should be proofread.') + .option('-l, --file-list [path]', 'Path to a list of files that should be proofread.') + .option('-c, --config-file', 'Path to a configuration file') + .parse(process.argv); + +if(program.configFile) { + config = require(program.configFile); +} + +var proofreader = new Proofreader(config); + +function toHTML(path, content) { + var mimeType = mime.lookup(path); + + if(mimeType === 'text/x-markdown') { + return marked(content); + } + + return content; +} + +var resultPromise = null; + +if(program.url) { + request({uri: program.url}, function (err, response, body) { + if (err) { + throw err; + } + + resultPromise = proofreader.proofread(toHTML(program.url, body)); + }); +} else if(program.file) { + var content = fs.readFileSync(program.file).toString(); + + resultPromise = proofreader.proofread(toHTML(program.file, content)); +} else if(program.fileList) { + var listOfFiles = fs.readFileSync(program.fileList).toString().split("\n"); + var promises = []; + + listOfFiles.forEach(function(filePath) { + if(filePath) { + var content = fs.readFileSync(filePath).toString(); + var promise = proofreader.proofread(toHTML(filePath, content)); + + promises.push(promise); + } + }); + + resultPromise = Promise.all(promises); +} + +if(resultPromise) { + resultPromise.then(null, function() { + process.exit(1); + }) +} \ No newline at end of file diff --git a/bin/proofreader.js.sh b/bin/proofreader.js.sh deleted file mode 100755 index 5cb7752..0000000 --- a/bin/proofreader.js.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env node - -//Helpers -var fs = require('fs'); -var request = require('request'); -var clc = require('cli-color'); -var Sync = require('sync'); -var cheerio = require('cheerio'); -var program = require('commander'); -var mime = require('mime'); -var marked = require('marked'); -var path = require('path'); -var Promise = require('promise'); - -program - .option('-u, --url [url]', 'URL to website that should be proofread.') - .option('-f, --file [path]', 'Path to HTML file that should be proofread.') - .option('-l, --file-list [path]', 'Path to a list of files that should be proofread.') - .option('-c, --include-correct', 'Include correct paragraphs in the output.') - .parse(process.argv); - -//Hemingway-app-like suggestions -var writeGood = require('write-good'); - -//Spelling dictionary setup -var spellcheck = require('nodehun-sentences'); -var nodehun = require('nodehun'); -//TODO move to settings -var dict = new nodehun( - fs.readFileSync(path.join(__dirname, '../dictionaries/en_US.aff')), - fs.readFileSync(path.join(__dirname, '../dictionaries/en_US.dic')) -); - -dict.addDictionary(fs.readFileSync(path.join(__dirname, '../dictionaries/en_GB.dic'))); - -//TODO move to settings -var customDict = ['minifier', 'minifying', 'minified', 'DevTools', 'breakpoint', 'breakpoints', 'unminified', 'evals', 'evaled', 'debuggable', 'uncaught', 'protip', 'subtree', 'blackboxing', 'blackbox', 'blackboxed', 'async', 'callback', 'callbacks', 'CoffeeScript', 'JavaScript', 'CSS', 'HTML5', 'app', 'apps', 'checkbox', 'checkboxes', 'timeline', 'V8', 'Cmd', 'Ctrl', 'workflow', 'workflows', 'localhost', 'JSON', 'subfolder', 'webpage', 'XHR', 'SQL', 'WebKit', 'AppCache', 'SDK', 'WebView', 'plugin', 'ADB', 'USB', 'MAMP', 'IP', 'omnibox', 'screencast', 'Wi-Fi', 'Sass', 'KitKat', 'WebViews', 'screencasting', 'API', 'IDE', 'WebSocket', 'WebSockets', 'VM', 'GC', 'iframe', 'iframes', 'inline', 'sourcemaps', 'sourcemap', 'wiki', 'Esc', 'F1', 'F2', 'F5', 'F6', 'F8', 'F10', 'F11', 'F12', 'hostname', 'WebGL', 'iOS', 'MathML', 'UA', 'GPU', 'UI', 'geolocation', 'GPS', 'viewport', 'stylesheet', 'stylesheets', 'dpi', 'iPhone', 'PageUp', 'PageDown', 'W3C', 'SCSS', 'RGB', 'HSL', 'XPath', 'blog', 'GitHub', 'NodeJS', 'WebStorm', 'JetBrains', 'WebDriver', 'screenshot', 'screenshots', 'RSS', 'UX', 'codebase', 'IRC', 'fallback', 'inspectable', 'dropdown', 'IndexedDB', 'WebSQL', 'jQuery', 'timeline', 'timelines', 'jank', 'HAR', 'TTFB', 'DNS', 'XMLHttpRequest', 'XMLHttpRequests', 'polyfill', 'SSL', 'DOM']; - -customDict.forEach(function(word) { - dict.addWord(word); -}); - -function toHTML(path, content) { - var mimeType = mime.lookup(path); - - if(mimeType === 'text/x-markdown') { - return marked(content); - } - - return content; -} - -function proofread(html) { - var $ = cheerio.load(html); - - return new Promise(function(resolve, reject) { - Sync(function () { - var suggestionsCount = 0; - - //Blacklist tags with code - //TODO move to settings - $('pre, code').remove(); - - //Whitelist tags that should be processed - //TODO move to settings - $('p, li, h1, h2, h3, h4, th, td, dl, figcaption').each(function () { - var text = $(this).text(); - - //remove linebreaks from text - text = text.replace(/(\r\n|\n|\r)+/gm," "); - - //replace ’ with ' - text = text.replace(/’/g, "'"); - - if(text.trim().length) { - var writeGoodSuggestions = writeGood(text); - var spellingSuggestions = spellcheck.sync(null, dict, text); - - //Printing output - if(writeGoodSuggestions.length || spellingSuggestions.length) { - console.log(clc.red(text)); - - writeGoodSuggestions.forEach(function(item) { - console.log(clc.blue.bold(' - ' + item.reason)); - }); - - spellingSuggestions.forEach(function(item) { - console.log(clc.magenta.bold(' - "' + item.word + '" -> ' + item.suggestions)); - }); - - console.log(); - } else if (program.includeCorrect) { - console.log(clc.green(text)); - console.log(); - } - - suggestionsCount += writeGoodSuggestions.length + spellingSuggestions.length; - } - }); - - return suggestionsCount; - }, function (err, result) { - if(err || result !== 0) { - reject(); - } else { - resolve(); - } - }); - }); -} - -var resultPromise = null; - -if(program.url) { - request({uri: program.url}, function (err, response, body) { - if (err) { - throw err; - } - - resultPromise = proofread(toHTML(program.url, body)); - }); -} else if(program.file) { - var content = fs.readFileSync(program.file).toString(); - - resultPromise = proofread(toHTML(program.file, content)); -} else if(program.fileList) { - var listOfFiles = fs.readFileSync(program.fileList).toString().split("\n"); - var promises = []; - - listOfFiles.forEach(function(filePath) { - if(filePath) { - var content = fs.readFileSync(filePath).toString(); - var promise = proofread(toHTML(filePath, content)); - - promises.push(promise); - } - }); - - resultPromise = Promise.all(promises); -} - -if(resultPromise) { - resultPromise.then(null, function() { - process.exit(1); - }) -} \ No newline at end of file diff --git a/lib/proofreader.js b/lib/proofreader.js new file mode 100644 index 0000000..f7c4b48 --- /dev/null +++ b/lib/proofreader.js @@ -0,0 +1,108 @@ +var path = require('path'); +var clc = require('cli-color'); +var Sync = require('sync'); +var cheerio = require('cheerio'); +var Promise = require('promise'); +var fs = require('fs'); +var writeGood = require('write-good'); +var nodehun = require('nodehun'); +var spellcheck = require('nodehun-sentences'); + +module.exports = Proofreader; + +function Proofreader(settings) { + if(!settings) { + throw new Error('Settings object missing.'); + } else if(!settings.dictionaries['build-in'] || !settings.dictionaries['build-in'].length) { + throw new Error('At least one build-in dictionary has to be set.'); + } else if(!settings.selectors || !settings.selectors.whitelist) { + throw new Error('Whitelist has to be set.'); + } + + this._settings = settings; + this._setupDictionaries(settings.dictionaries); +} + +Proofreader.prototype._setupDictionaries = function(dictionaries) { + var mainDictName = dictionaries['build-in'][0]; + dictionaries['build-in'].shift(); + + //main dictionary + var dict = new nodehun( + fs.readFileSync(path.join(__dirname, '../dictionaries/' + mainDictName + '.aff')), + fs.readFileSync(path.join(__dirname, '../dictionaries/' + mainDictName + '.dic')) + ); + + //other build-in dictionaries + dictionaries['build-in'].forEach(function(dictName) { + dict.addDictionary(path.join(__dirname, '../dictionaries/' + dictName + '.dic')); + }); + + //other custom dictionaries + if(dictionaries['custom']) { + dictionaries['custom'].forEach(function(dictPath) { + dict.addDictionary(fs.readFileSync(dictPath)); + }); + } + + this._dictionary = dict; +}; + +Proofreader.prototype.proofread = function(html) { + var $ = cheerio.load(html); + var dictionary = this._dictionary; + var whitelist = this._settings.selectors.whitelist; + var blacklist = this._settings.selectors.blacklist; + + return new Promise(function(resolve, reject) { + Sync(function () { + var suggestionsCount = 0; + + //Blacklisted elements are removed before text is processed + if(blacklist) { + $(blacklist).remove(); + } + + //Only whitelisted elements are processed + $(whitelist).each(function () { + var text = $(this).text(); + + //remove linebreaks from text + text = text.replace(/(\r\n|\n|\r)+/gm," "); + + //replace ’ with ' + text = text.replace(/’/g, "'"); + + if(text.trim().length) { + var writeGoodSuggestions = writeGood(text); + var spellingSuggestions = spellcheck.sync(null, dictionary, text); + + //Printing output + if(writeGoodSuggestions.length || spellingSuggestions.length) { + console.log(clc.red(text)); + + writeGoodSuggestions.forEach(function(item) { + console.log(clc.blue.bold(' - ' + item.reason)); + }); + + spellingSuggestions.forEach(function(item) { + console.log(clc.magenta.bold(' - "' + item.word + '" -> ' + item.suggestions)); + }); + + console.log(); + } + + suggestionsCount += writeGoodSuggestions.length + spellingSuggestions.length; + } + }); + + return suggestionsCount; + }, function (err, result) { + if(err || result !== 0) { + reject(); + } else { + resolve(); + } + }); + }); +}; \ No newline at end of file diff --git a/package.json b/package.json index ac6facc..29189c0 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "proofreader", "description": "Simple text proofreader based on 'write-good' (hemingway-app-like suggestions) and 'nodehun' (spelling).", - "version": "0.4.0", + "version": "0.5.0", "repository": { "type": "git", "url": "git://github.com/kdzwinel/Proofreader.git" @@ -16,7 +16,7 @@ } ], "bin": { - "proofreader": "./bin/proofreader.js.sh" + "proofreader": "./bin/cmd.js" }, "dependencies": { "cheerio": "^0.18.0", diff --git a/settings.json b/settings.json new file mode 100644 index 0000000..af6afba --- /dev/null +++ b/settings.json @@ -0,0 +1,10 @@ +{ + "dictionaries": { + "build-in": ["en_US"], + "custom": [] + }, + "selectors": { + "whitelist": "p, li, h1, h2, h3, h4, th, td, dl, figcaption", + "blacklist": "pre, code" + } +} \ No newline at end of file