-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
265 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ crystal/.shards/ | |
crystal/crystal | ||
ols.json | ||
odin/related | ||
c3/related | ||
.dart_tool/ | ||
v/related | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// Using std::collections::object to serialize json would require | ||
// converting into objects; instead, I just directly wrote the json. | ||
// It's ugly copy-paste one-time-use code. | ||
module related::json; | ||
|
||
import std::collections; | ||
import std::encoding::json; | ||
import std::io; | ||
|
||
// Nothing is freed, and assumes that input outlives output | ||
fn Post[]! parse_posts(String input, Allocator alloc = allocator::heap()) { | ||
List(<Post>) posts; | ||
Object* posts_arr = json::parse_string(input, alloc)!; | ||
foreach (i, post_obj : posts_arr.array) { | ||
List(<String>) tags; | ||
foreach (tag_obj : post_obj.get("tags")!.array) { | ||
tags.push(tag_obj.s); | ||
} | ||
posts.push(Post { | ||
._id = post_obj.get_string("_id")!, | ||
.title = post_obj.get_string("title")!, | ||
.tags = tags.array_view(), | ||
}); | ||
} | ||
return posts.array_view(); | ||
} | ||
|
||
fn String Post.to_json(post) { | ||
DString out; | ||
out.append('{'); | ||
|
||
// _id | ||
out.append("\"_id\":\""); | ||
out.append(post._id); | ||
out.append("\","); | ||
|
||
// title | ||
out.append("\"title\":\""); | ||
out.append(post.title); | ||
out.append("\","); | ||
|
||
// tags | ||
out.append("\"tags\":["); | ||
if (post.tags.len > 0) { | ||
out.append("\""); | ||
out.append(post.tags[0]); | ||
out.append("\""); | ||
} | ||
if (post.tags.len > 1) { | ||
foreach(tag : post.tags[1..]) { | ||
out.append(","); | ||
out.append("\""); | ||
out.append(tag); | ||
out.append("\""); | ||
} | ||
} | ||
out.append("]"); | ||
out.append('}'); | ||
return out.str_view(); | ||
} | ||
|
||
fn String TopPosts.to_json(topposts) { | ||
DString out; | ||
out.append('{'); | ||
|
||
// _id | ||
out.append("\"_id\":\""); | ||
out.append(*topposts._id); | ||
out.append("\","); | ||
|
||
// tags | ||
out.append("\"tags\":["); | ||
if (topposts.tags.len > 0) { | ||
out.append("\""); | ||
out.append((*topposts.tags)[0]); | ||
out.append("\""); | ||
} | ||
if (topposts.tags.len > 1) { | ||
foreach(tag : (*topposts.tags)[1..]) { | ||
out.append(","); | ||
out.append("\""); | ||
out.append(tag); | ||
out.append("\""); | ||
} | ||
} | ||
out.append("],"); | ||
|
||
// related | ||
out.append("\"related\":["); | ||
if (topposts.related.len > 0) { | ||
out.append(topposts.related[0].to_json()); | ||
} | ||
if (topposts.related.len > 1) { | ||
foreach(related_post : topposts.related[1..]) { | ||
out.append(','); | ||
out.append(related_post.to_json()); | ||
} | ||
} | ||
out.append("]"); | ||
|
||
// finish | ||
out.append('}'); | ||
return out.str_view(); | ||
} | ||
|
||
fn String TopPosts[].to_json(topposts_list) { | ||
DString out; | ||
out.append("[\n"); | ||
if (topposts_list.len > 0) { | ||
out.append(topposts_list[0].to_json()); | ||
} | ||
if (topposts_list.len > 1) { | ||
foreach(topposts : topposts_list[1..]) { | ||
out.append(",\n"); | ||
out.append(topposts.to_json()); | ||
} | ||
} | ||
out.append("\n]"); | ||
return out.str_view(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
module related; | ||
|
||
import std::collections; | ||
import std::core::mem; | ||
import std::io; | ||
import std::time::clock; | ||
|
||
struct Post { | ||
String _id; | ||
String title; | ||
Tag[] tags; | ||
} | ||
def Posts = Post[]; | ||
|
||
def PostIdx = uint; | ||
def PostIdxList = List(<PostIdx>); | ||
def Tag = String; | ||
def Tag2PostIdxList = HashMap(<Tag, PostIdxList>); | ||
|
||
struct TopPosts { | ||
String* _id; | ||
Tag[]* tags; | ||
Post*[] related; | ||
} | ||
struct Score { | ||
char s; | ||
uint pos; | ||
} | ||
|
||
fn char is_top(char m, char[] score) @inline { | ||
char x; | ||
foreach (s : score) { | ||
// TODO is there a compiler builtin for bool_to_int? | ||
x |= (s > m) ? 1 : 0; | ||
} | ||
return x; | ||
} | ||
|
||
fn void get_top(uint b, char[] score, char* min, Score[] t5) @inline { | ||
uint i = b; | ||
int score_idx; | ||
while(score_idx < score.len) { | ||
char s = score[score_idx]; | ||
if (s > *min) { | ||
ichar u = 3; | ||
while (u >= 0 && s > t5[u].s) { | ||
t5[u + 1] = t5[u]; | ||
u -= 1; | ||
} | ||
t5[u + 1] = Score{ .s = s, .pos = i }; | ||
*min = t5[4].s; | ||
} | ||
i += 1; | ||
score_idx += 1; | ||
} | ||
} | ||
|
||
fn void top5(Post*[] related, char[] score, Post[] ps) @inline { | ||
Score s = { .s = 0, .pos = 0 }; | ||
Score[5] t5 = { s, s, s, s, s }; | ||
char min_tags; | ||
|
||
uint b; | ||
uint cache_line = 64; | ||
while (b < score.len) { | ||
uint e = min(b + cache_line, (uint)score.len); | ||
char[] chunk = score[b..e - 1]; | ||
if (is_top(min_tags, chunk) > 0) { | ||
get_top(b, chunk, &min_tags, t5[0..]); | ||
} | ||
b += cache_line; | ||
} | ||
foreach (i, t : t5) { | ||
related[i] = &ps[t.pos]; | ||
} | ||
} | ||
|
||
fn void main() { | ||
String input = (String)file::load_new("../posts.json")!!; | ||
Post[] posts = json::parse_posts(input)!!; | ||
|
||
Clock start = clock::now(); | ||
|
||
Tag2PostIdxList tag2postidxs; | ||
foreach (post_idx, post : posts) { | ||
foreach (tag : post.tags) { | ||
// TODO hashmap doesn't have the API | ||
// that would remove extra hash comparisons | ||
if (!tag2postidxs.has_key(tag)) { | ||
tag2postidxs.set(tag, {}); | ||
} | ||
tag2postidxs.get_ref(tag)!!.push((uint)post_idx); | ||
} | ||
} | ||
|
||
TopPosts[] op = mem::new_array(TopPosts, posts.len); | ||
Post*[] rl = mem::new_array(Post*, posts.len * 5); | ||
|
||
char[] tagged_post_count = mem::new_array(char, posts.len); | ||
|
||
for (int post_idx = 0; post_idx < posts.len; post_idx += 1) { | ||
// reset tagged_post_count | ||
mem::zero_volatile(tagged_post_count); | ||
|
||
foreach (tag : posts[post_idx].tags) { | ||
foreach (tagged_post_idx : tag2postidxs.get(tag)!!) { | ||
tagged_post_count[tagged_post_idx] += 1; | ||
} | ||
} | ||
|
||
tagged_post_count[post_idx] = 0; // Don't count self | ||
|
||
Post*[] related = rl[post_idx * 5 : 5]; | ||
top5(related, tagged_post_count, posts); | ||
op[post_idx] = { ._id = &posts[post_idx]._id, .tags = &posts[post_idx].tags, .related = related }; | ||
} | ||
NanoDuration end = start.to_now(); | ||
io::printf("Processing time (w/o IO): %sms\n", end.to_ms()); | ||
|
||
File op_file = file::open("../related_posts_c3.json", "wb")!!; | ||
io::fprint(&op_file, op.to_json())!!; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters