Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update odin #532

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ RUN wget https://download.swift.org/swift-5.9-release/ubuntu2204/swift-5.9-RELEA
RUN tar -xvf /home/builduser/swift.tar.gz -C /home/builduser && rm /home/builduser/swift.tar.gz && export PATH=/home/builduser/swift-5.9-RELEASE-ubuntu22.04/usr/bin:$PATH

# install odin
RUN wget 'https://github.com/odin-lang/Odin/releases/download/dev-2023-12/odin-ubuntu-amd64-dev-2023-12.zip' -O /home/builduser/odin.zip
RUN wget 'https://github.com/odin-lang/Odin/releases/download/dev-2025-02/odin-linux-amd64-dev-2025-02.zip' -O /home/builduser/odin.zip

RUN unzip /home/builduser/odin.zip -d /home/builduser/odin

Expand Down
99 changes: 99 additions & 0 deletions odin/json.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package main

import "core:strings"

// marshalling doesn't work with pointers
write_json :: proc(topposts_list: []TopPosts) -> string {
out := strings.builder_make()
strings.write_string(&out, "[\n")
if len(topposts_list) > 0 {
strings.write_string(&out, write_json_topposts(topposts_list[0]))
}
if len(topposts_list) > 1 {
for topposts in (topposts_list[1:]) {
strings.write_string(&out, ",\n")
strings.write_string(&out, write_json_topposts(topposts))
}
}
strings.write_string(&out, "\n]")
return strings.to_string(out)
}

write_json_post :: proc(post: Post) -> string {
out := strings.builder_make()
strings.write_string(&out, "{")

// _id
strings.write_string(&out, "\"_id\":\"")
strings.write_string(&out, post.id)
strings.write_string(&out, "\",")

// title
strings.write_string(&out, "\"title\":\"")
strings.write_string(&out, post.title)
strings.write_string(&out, "\",")

// tags
strings.write_string(&out, "\"tags\":[")
if len(post.tags) > 0 {
strings.write_string(&out, "\"")
strings.write_string(&out, post.tags[0])
strings.write_string(&out, "\"")
}
if len(post.tags) > 1 {
for tag in (post.tags[1:]) {
strings.write_string(&out, ",")
strings.write_string(&out, "\"")
strings.write_string(&out, tag)
strings.write_string(&out, "\"")
}
}

strings.write_string(&out, "]")
strings.write_string(&out, "}")
return strings.to_string(out)
}

write_json_topposts :: proc(topposts: TopPosts) -> string {
out := strings.builder_make()
strings.write_string(&out, "{")

// _id
strings.write_string(&out, "\"_id\":\"")
strings.write_string(&out, topposts.id^)
strings.write_string(&out, "\",")

// tags
strings.write_string(&out, "\"tags\":[")
if len(topposts.tags) > 0 {
strings.write_string(&out, "\"")
strings.write_string(&out, topposts.tags[0])
strings.write_string(&out, "\"")
}
if len(topposts.tags) > 1 {
for tag in (topposts.tags^)[1:] {
strings.write_string(&out, ",")
strings.write_string(&out, "\"")
strings.write_string(&out, tag)
strings.write_string(&out, "\"")
}
}
strings.write_string(&out, "],")

// related
strings.write_string(&out, "\"related\":[")
if len(topposts.related) > 0 {
strings.write_string(&out, write_json_post(topposts.related[0]^))
}
if len(topposts.related) > 1 {
for related_post in topposts.related[1:] {
strings.write_string(&out, ",")
strings.write_string(&out, write_json_post(related_post^))
}
}
strings.write_string(&out, "]")

// finish
strings.write_string(&out, "}")
return strings.to_string(out)
}
162 changes: 94 additions & 68 deletions odin/related.odin
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,84 @@ package main
import "core:encoding/json"
import "core:fmt"
import "core:os"
import "core:slice"
import "core:time"
// import "core:slice"

topN :: 5

Post :: struct {
id: string `json:"_id"`,
title: string,
tags: []string,
tags: []Tag,
}
Posts :: []Post

RelatedPosts :: struct {
id: string `json:"_id"`,
tags: []string,
PostIdx :: uint
PostIdxList :: [dynamic]PostIdx
Tag :: string
Tag2PostIdxList :: map[Tag]PostIdxList

// marshal/unmarshal doesn't work with pointers
related: [topN]Post,
TopPosts :: struct {
id: ^string `json:"_id"`,
tags: ^[]Tag,
related: []^Post,
}
Score :: struct {
s: u8,
pos: u32,
}

is_top :: #force_inline proc(m: u8, score: []u8) -> u8 {
x: u8
for s in score {
// TODO is there a compiler builtin for bool_to_int?
x |= (s > m) ? 1 : 0
}
return x
}

get_top :: #force_inline proc(b: u32, score: []u8, min: ^u8, t5: []Score) {
i := b
score_idx: uint
for score_idx < cast(uint)len(score) {
s := score[score_idx]
if s > min^ {
u := 3
for (u >= 0 && s > t5[u].s) {
t5[u + 1] = t5[u]
u -= 1
}
t5[u + 1] = Score {
s = s,
pos = i,
}
min^ = t5[4].s
}
i += 1
score_idx += 1
}
}

top5 :: #force_inline proc(related: []^Post, score: []u8, ps: []Post) {
s := Score {
s = 0,
pos = 0,
}
t5: [5]Score = {s, s, s, s, s}
min_tags: u8

b: uint
cache_line: uint = 64
for b < len(score) {
e := min(b + cache_line, cast(uint)len(score))
chunk := score[b:e]
if is_top(min_tags, chunk) > 0 {
get_top(u32(b), chunk, &min_tags, t5[0:])
}
b += cache_line
}
for t, i in t5 {
related[i] = &ps[t.pos]
}
}

main :: proc() {
file, ok := os.read_entire_file_from_filename("../posts.json")
Expand All @@ -31,7 +90,6 @@ main :: proc() {
return
}


posts: []Post

err := json.unmarshal(file, &posts)
Expand All @@ -42,82 +100,50 @@ main :: proc() {

start := time.now()

tag_map := make(map[string][dynamic]int)

for post, i in posts {
tag2postidxs: Tag2PostIdxList
for post, post_idx in posts {
for tag in post.tags {
_, ok := tag_map[tag]

if !ok {
tag_map[tag] = make([dynamic]int)
// TODO hashmap doesn't have the API
// that would remove extra hash comparisons
if !(tag in tag2postidxs) {
tag2postidxs[tag] = {}
}

append_elem(&tag_map[tag], i)
post_idxs := &tag2postidxs[tag]
append(post_idxs, uint(post_idx))
}
}

op := make([]TopPosts, len(posts))
rl := make([]^Post, len(posts) * 5)

post_count := len(posts)
all_related_posts := make([]RelatedPosts, post_count)

tagged_post_count := make([]int, post_count)
tagged_post_count := make([]u8, len(posts))

for post, i in posts {
for post_idx := 0; post_idx < len(posts); post_idx += 1 {
// reset tagged_post_count
slice.zero(tagged_post_count)

for item, j in tagged_post_count {
tagged_post_count[j] = 0
}

for tag in post.tags {
for item in tag_map[tag] {
tagged_post_count[item] += 1
for tag in posts[post_idx].tags {
for tagged_post_idx in tag2postidxs[tag] {
tagged_post_count[tagged_post_idx] += 1
}
}

tagged_post_count[i] = 0 // don't count self

top5 := [topN * 2]int{}
min_tags := 0

for count, pIdx in tagged_post_count {
if count > min_tags {

upperBound := (topN - 2) * 2

for upperBound >= 0 && count > top5[upperBound] {
top5[upperBound+2] = top5[upperBound]
top5[upperBound+3] = top5[upperBound+1]
upperBound -= 2
}

insertPos := upperBound + 2
top5[insertPos] = count
top5[insertPos+1] = pIdx
tagged_post_count[post_idx] = 0 // Don't count self

min_tags = top5[topN*2-2]
related := rl[post_idx * 5:post_idx * 5 + 5]
top5(related, tagged_post_count, posts)
op[post_idx] = {
id = &posts[post_idx].id,
tags = &posts[post_idx].tags,
related = related,
}
}

top_posts := [topN]Post{}

for i := 1; i < 10; i += 2 {
top_posts[i/2] = posts[top5[i]]
}


all_related_posts[i] = RelatedPosts{post.id, post.tags, top_posts}
}

fmt.println("Processing time (w/o IO): ", time.since(start))

jsonStr, err3 := json.marshal(all_related_posts)

if err3 != nil {
fmt.println("Error marshaling json: ", err3)
return
}
out_str := transmute([]u8)write_json(op)

ok = os.write_entire_file("../related_posts_odin.json", jsonStr)
ok = os.write_entire_file("../related_posts_odin.json", out_str)
if !ok {
fmt.println("failed to write related_posts.json")
return
Expand Down
4 changes: 2 additions & 2 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ run_julia_con() {

run_odin() {
echo "Running Odin" &&
cd ./odin &&
odin build related.odin -file -o:speed &&
odin build ./odin -o:aggressive -out:./odin/related &&
cd ./odin && \
run_command "Odin" $runs ./related &&
check_output "related_posts_odin.json"
}
Expand Down