Skip to content

Commit

Permalink
🐛 Streaming JSON parsing fix
Browse files Browse the repository at this point in the history
OpenAI sucks and decided to break up their json, but were nice enough to have an easy delimiter so that we can easily split by that. I also added a buffer so that JSON split by chunks are processed in the next chunk
  • Loading branch information
darkdarcool committed Feb 12, 2024
1 parent 2f60f7c commit 7b4bf69
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 57 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
/target
debug.txt
resp.txt
.vscode/
.DS_Store
11 changes: 10 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
lazy_static = "1.4.0"
# lazy_static = "1.4.0"
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.11", features = ["json", "cookies", "stream"] }
serde_json = { version = "1.0.113" }
Expand All @@ -16,4 +16,5 @@ uuid = { version = "1.7.0", features = ["v4"] }
chrono = "0.4.33"
rand = "0.8.5"
futures = "0.3.30"
homedir = "0.2.1"
homedir = "0.2.1"
repair_json = "0.1.0"
112 changes: 59 additions & 53 deletions src/copilot.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,22 @@
use std::io::Write;

use crate::{gh, headers::{CopilotCompletionHeaders, Headers}, prompts};
use crate::{gh, headers::{CopilotCompletionHeaders, Headers}, prompts, utils};
use futures::StreamExt;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use serde_json::json;

#[derive(Serialize, Deserialize, Debug)]
struct ContentFilterOffsets {
check_offset: u64,
start_offset: u64,
end_offset: u64,
}

#[derive(Serialize, Deserialize, Debug)]
struct ContentFilterResults {
hate: FilterResult,
self_harm: FilterResult,
sexual: FilterResult,
violence: FilterResult,
struct ContentFilterResult {
filtered: bool,
severity: String,
}

#[derive(Serialize, Deserialize, Debug)]
struct FilterResult {
filtered: bool,
severity: String,
struct ContentFilterOffsets {
check_offset: i32,
start_offset: i32,
end_offset: i32,
}

#[derive(Serialize, Deserialize, Debug)]
Expand All @@ -35,18 +27,25 @@ struct Delta {

#[derive(Serialize, Deserialize, Debug)]
struct Choice {
index: u64,
index: i32,
content_filter_offsets: ContentFilterOffsets,
content_filter_results: ContentFilterResults,
content_filter_results: Option<ContentFilterResults>,
delta: Delta,
#[serde(rename = "finish_reason")]
finish_reason: Option<String>,
}

#[derive(Serialize, Deserialize, Debug)]
struct ContentFilterResults {
hate: ContentFilterResult,
self_harm: ContentFilterResult,
sexual: ContentFilterResult,
violence: ContentFilterResult,
}

#[derive(Serialize, Deserialize, Debug)]
struct GhCopilotResponse {
choices: Vec<Choice>,
created: u64,
created: i64,
id: String,
}

Expand All @@ -56,6 +55,12 @@ struct Message {
role: String,
}

#[derive(Debug)]
pub struct Completion {
pub content: String,
pub finish_reason: String
}

pub struct CopilotManager<'a> {
vscode_sid: String,
device_id: String,
Expand Down Expand Up @@ -95,7 +100,7 @@ impl<'a> CopilotManager<'a> {
.collect()
}

pub async fn ask(&mut self, prompt: &String, log: bool) -> String {
pub async fn ask(&mut self, prompt: &String, log: bool) -> Completion {
let url = "https://api.githubcopilot.com/chat/completions";
let headers = CopilotCompletionHeaders {
token: &self.auth.copilot_auth.token,
Expand Down Expand Up @@ -136,55 +141,53 @@ impl<'a> CopilotManager<'a> {

let mut message = String::new();
let mut buffer = String::new();
let mut finish_reason = String::new();

'outerloop: while let Some(chunk) = response.next().await {
let body = chunk.unwrap();
let body_str = String::from_utf8_lossy(&body)
.into_owned()
.replace("\n", "");
let body_str = String::from_utf8_lossy(&body);

let lines: Vec<String> = body_str
.split("data:")
.map(|s| s.trim())
.map(|s| s.to_string())
.collect();
buffer.push_str(&body_str);
// the data may be split into multiple chunks, BUT it's always dilimited by \n\ndata:

let lines = buffer.split("\n\ndata: ").map(|s| s.to_string()).map(|s| s.replacen("data:", "", 1)).collect::<Vec<String>>();


let mut processed_buffer = String::new();
for line in lines {
if line == "" {
utils::append_to_file("resp.txt", &format!("{}\n", line));
if line.is_empty() {
continue;
}

buffer.push_str(&line.trim());
let parsed = serde_json::from_str::<GhCopilotResponse>(&line);

let json = serde_json::from_str::<GhCopilotResponse>(&buffer);

match json {
Ok(json) => {
if json.choices.len() > 0 {
if let Some(content) = &json.choices[0].delta.content {
if log {
print!("{}", content);
std::io::stdout().flush().unwrap();
}
match parsed {
Ok(parsed) => {
if parsed.choices.len() > 0 {
let choice = &parsed.choices[0];
if let Some(freason) = &choice.finish_reason {
finish_reason = freason.clone().to_string();
break 'outerloop;
}
let delta = &choice.delta;
if let Some(content) = &delta.content {
print!("{}", content);
std::io::stdout().flush().unwrap();
message.push_str(content);
} else if let Some(_finish) = &json.choices[0].finish_reason {
// println!("Finish reason: {}", finish);
} else {
// utils::append_to_file("debugr.txt", &format!("{:#?}\n", json));
}
}

buffer.clear();
}
Err(_e) => {
if line == "[DONE]" {
break 'outerloop;
}
// utils::append_to_file("debug.txt", &format!("{}\n", e));
continue;
Err(_) => {
utils::append_to_file("debug.txt", &format!("{}\n", line));
processed_buffer.push_str(&line);
}
}

buffer = processed_buffer.clone();
}

}

if log {
Expand All @@ -200,6 +203,9 @@ impl<'a> CopilotManager<'a> {

self.history = history;

message
Completion {
content: message.clone(),
finish_reason
}
}
}
6 changes: 5 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ async fn main() {
break;
}

copilot_m.ask(&input, true).await;
let msg = copilot_m.ask(&input, true).await;

println!("===COPILOT===");
println!("{:#?}", msg);

}
}

0 comments on commit 7b4bf69

Please sign in to comment.