Skip to content

Commit

Permalink
Merge branch 'search-operators' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmz committed Aug 31, 2024
2 parents 7d56144 + e2eb8e8 commit 46dae18
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 16 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [2.23.0] - Not released
### Added
- The new `is:` search operator
### Changed
- The `has` search operator now accepts file extensions (as in `has:pdf` or
`has:.pdf`)

## [2.22.0] - 2024-08-30
### Added
Expand Down
70 changes: 62 additions & 8 deletions app/support/DbAdapter/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ const searchTrait = (superClass) =>
const fileTypesSQL = fileTypesFiltersSQL(parsedQuery, 'a');
const useFilesTable = isNonTrivialSQL(fileTypesSQL);

// Posts privacy flags
const postsPrivacySQL = privacyFiltersSQL(parsedQuery, 'p');

// Counters
const postCountersSQL = andJoin([
countersFiltersSQL(parsedQuery, 'comments', 'pc.comments_count'),
Expand Down Expand Up @@ -130,9 +133,9 @@ const searchTrait = (superClass) =>

if (useCommentsTable) {
const notBannedSQLFabric = await this.notBannedActionsSQLFabric(viewerId);
commentsRestrictionSQL = andJoin([
pgFormat('(c.hide_type is null or c.hide_type=%L)', Comment.VISIBLE),
notBannedSQLFabric('c'),
commentsRestrictionSQL = orJoin([
'c.id is null',
andJoin([pgFormat('c.hide_type=%L', Comment.VISIBLE), notBannedSQLFabric('c')]),
]);
}

Expand Down Expand Up @@ -183,6 +186,7 @@ const searchTrait = (superClass) =>
commentsRestrictionSQL,
postCountersSQL,
commentCountersSQL,
postsPrivacySQL,
]),
`group by p.uid, p.${sort}_at, p.id`,
`having ${andJoin([fileTypesSQL, cLikesSQL])}`,
Expand Down Expand Up @@ -428,6 +432,43 @@ function dateFiltersSQL(tokens, field, targetScope) {
return andJoin(result);
}

function privacyFiltersSQL(tokens, postsTable) {
const privacyWords = ['public', 'private', 'protected'];
let positive = null;
let negative = null;
walkWithScope(tokens, (token) => {
if (token instanceof Condition && token.condition === 'is') {
const words = token.args.filter((w) => privacyWords.includes(w));

if (!token.exclude) {
positive = positive ? union(positive, words) : uniq(words);
} else {
negative = negative ? union(negative, words) : uniq(words);
}
}
});

return andJoin([
positive && orJoin(positive.map((p) => privacySQLCondition(p, postsTable))),
negative && sqlNot(orJoin(negative.map((p) => privacySQLCondition(p, postsTable)))),
]);
}

/**
* @param {'public' | 'private' | 'protected'} privacyWord
* @param {string} postsTable
* @returns {string}
*/
function privacySQLCondition(privacyWord, postsTable) {
if (privacyWord === 'public') {
return `not ${postsTable}.is_protected`;
} else if (privacyWord === 'protected') {
return `${postsTable}.is_protected and not ${postsTable}.is_private`;
}

return `${postsTable}.is_private`;
}

function countersFiltersSQL(tokens, condition, field) {
const result = [];

Expand All @@ -454,7 +495,7 @@ function intervalSQL(token, field) {
return 'false';
}

const validFileTypes = ['audio', 'image', 'general'];
const commonFileTypes = ['audio', 'image', 'general'];
/**
* Returns aggregated List of file types used in 'has:' conditions. Returns null
* if none of such conditions present.
Expand All @@ -476,8 +517,8 @@ function getFileTypes(tokens) {
// Select only the valid file types
const argTypes = token.args
// The 'file' type means 'audio, image, or general'
.flatMap((a) => (a === 'file' ? validFileTypes : a))
.filter((a) => validFileTypes.includes(a));
.flatMap((a) => (a === 'file' ? commonFileTypes : a))
.filter((a) => commonFileTypes.includes(a) || a.startsWith('.'));

if (!token.exclude) {
positive = positive ? union(positive, argTypes) : uniq(argTypes);
Expand All @@ -495,12 +536,25 @@ function getFileTypes(tokens) {
* @returns {string|null}
*/
function fileTypesToAggregate(types, attTable) {
if (types.length === validFileTypes.length) {
if (commonFileTypes.every((t) => types.includes(t))) {
// Any type is valid
return `bool_or(${attTable}.media_type is not null)`;
}

return `bool_or(${orJoin(types.map((t) => `${attTable}.media_type = '${t}'`))})`;
return `bool_or(${orJoin(
types.map((t) => {
if (t.startsWith('.')) {
// This is a file extension
return pgFormat(
`lower(reverse(split_part(reverse(${attTable}.file_name), '.', 1))) = %L`,
t.replace(/^\./, ''),
);
}
// This is a media type
return pgFormat(`${attTable}.media_type = %L`, t);
}),
)})`;
}

function fileTypesFiltersSQL(tokens, attTable) {
Expand Down
2 changes: 1 addition & 1 deletion app/support/DbAdapter/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ export function sqlNot(statement: string): string {
return 'true';
}

return `not ${statement}`;
return `not ${statement.includes(' ') ? `(${statement})` : statement}`;
}

// These helpers allow to use the IN operator with the empty list of values. 'IN
Expand Down
22 changes: 16 additions & 6 deletions app/support/search/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,17 +157,27 @@ export function parseQuery(query: string, { minPrefixLength }: ParseQueryOptions
return;
}

// has:images,audio
// has:images,audio, has:jpg or has:.jpg
if (groups.cond === 'has') {
const validWords = ['image', 'audio', 'file'];
const words = (groups.word as string)
.split(',')
.map((w) => w.replace(/s$/g, ''))
.filter((w) => validWords.includes(w));
const words = (groups.word as string).split(',').map((w) => {
if (/^images?|audios?|files?$/.test(w)) {
return w.replace(/s$/g, '');
}

return w.startsWith('.') ? w : `.${w}`;
});
tokens.push(new Condition(!!groups.exclude, 'has', words));
return;
}

// is:public,protected
if (groups.cond === 'is') {
const validWords = ['public', 'protected', 'private'];
const words = (groups.word as string).split(',').filter((w) => validWords.includes(w));
tokens.push(new Condition(!!groups.exclude, 'is', words));
return;
}

// (-)comments:2..12
for (const [re, condition] of counterConditions) {
if (!re.test(groups.cond)) {
Expand Down
5 changes: 4 additions & 1 deletion app/support/search/query-syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Some operators takes user name as an arguments. In such operators you can use a
* cliked-by:
* to:
* has:
* is:
* comments: *(interval)*
* likes: *(interval)*
* clikes: *(interval)*
Expand Down Expand Up @@ -117,7 +118,9 @@ The "in:" operator has the "group:" alias, it left for compatibility.

Since `cliked-by:` makes sense only for comments, it switches the search scope to comments. So the query `cat cliked-by:alice` is equal to `in-comments: cat cliked-by:alice`. Being used in post body scope (like `in-body: cliked-by:...`), `cliked-by:` is ignored.

**has:images,audio** limits search to posts with files of the specified type. You can specify the concrete file type (only `images` or `audio` for now), or search for any files using the `has:files` form.
**is:private,protected** limits search to posts with the specified visibility. These are `private`, `protected` and `public`.

**has:images,audio** limits search to posts with files of the specified type. You can specify the concrete file type (only `images` or `audio` for now), or search for any files using the `has:files` form. You can also specify the file extension, for example `has:mp3` will search for files with the `mp3` extension.

**comments:*(interval)*** limits search to posts with the specified number of comments.

Expand Down
13 changes: 13 additions & 0 deletions migrations/20240831105941_attachments_ext_index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import type { Knex } from 'knex';

export const up = (knex: Knex) =>
knex.schema.raw(`do $$begin
create index attachments_extension_idx on attachments
-- Negative positions (from the end) in 'split_part' are supported only in PostgreSQL 14+
( lower(reverse(split_part(reverse(file_name), '.', 1))) );
end$$`);

export const down = (knex: Knex) =>
knex.schema.raw(`do $$begin
drop index attachments_extension_idx;
end$$`);
10 changes: 10 additions & 0 deletions test/integration/support/DbAdapter/search-files.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ describe('Search by file types', () => {
expect(postIds, 'to equal', [posts[1].id, posts[0].id]);
});

it('should search posts with .jpg files', async () => {
const postIds = await dbAdapter.search('has:jpg');
expect(postIds, 'to equal', [posts[0].id]);
});

it('should search posts with images but without the .jpg files', async () => {
const postIds = await dbAdapter.search('has:images -has:.jpg');
expect(postIds, 'to equal', [posts[1].id]);
});

it('should search posts with audio', async () => {
const postIds = await dbAdapter.search('has:audio');
expect(postIds, 'to equal', [posts[1].id]);
Expand Down
74 changes: 74 additions & 0 deletions test/integration/support/DbAdapter/search-privacy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* eslint-env node, mocha */
/* global $pg_database */
import expect from 'unexpected';

import cleanDB from '../../../dbCleaner';
import { dbAdapter } from '../../../../app/models';
import { createUsers } from '../../helpers/users';
import { createPost } from '../../helpers/posts-and-comments';

describe('Search by post privacy', () => {
const posts = [];
let luna, mars, venus;

before(async () => {
await cleanDB($pg_database);

[luna, mars, venus] = await createUsers(['luna', 'mars', 'venus']);
await luna.update({ isPrivate: '1' });
await mars.update({ isProtected: '1' });

posts.push(await createPost(luna, 'Post1'));
posts.push(await createPost(mars, 'Post2'));
posts.push(await createPost(venus, 'Post3'));
});

describe('Anonymous search', () => {
it('should not return any private posts', async () => {
const postIds = await dbAdapter.search('is:private');
expect(postIds, 'to equal', []);
});

it('should not return any protected posts', async () => {
const postIds = await dbAdapter.search('is:protected');
expect(postIds, 'to equal', []);
});

it('should return only public posts', async () => {
const postIds = await dbAdapter.search('is:public');
expect(postIds, 'to equal', [posts[2].id]);
});
});

describe('Authenticated search', () => {
it('should return only private posts', async () => {
const postIds = await dbAdapter.search('is:private', { viewerId: luna.id });
expect(postIds, 'to equal', [posts[0].id]);
});

it('should return only protected posts', async () => {
const postIds = await dbAdapter.search('is:protected', { viewerId: luna.id });
expect(postIds, 'to equal', [posts[1].id]);
});

it('should return only public posts', async () => {
const postIds = await dbAdapter.search('is:public', { viewerId: luna.id });
expect(postIds, 'to equal', [posts[2].id]);
});

it('should return not public posts', async () => {
const postIds = await dbAdapter.search('-is:public', { viewerId: luna.id });
expect(postIds, 'to equal', [posts[1].id, posts[0].id]);
});

it('should return not protected posts', async () => {
const postIds = await dbAdapter.search('-is:protected', { viewerId: luna.id });
expect(postIds, 'to equal', [posts[2].id, posts[0].id]);
});

it('should return not private posts', async () => {
const postIds = await dbAdapter.search('-is:private', { viewerId: luna.id });
expect(postIds, 'to equal', [posts[2].id, posts[1].id]);
});
});
});

0 comments on commit 46dae18

Please sign in to comment.