From 8ed22c27321c7e78555eb7768e0d9bbb1cb35722 Mon Sep 17 00:00:00 2001 From: Jonathan Woollett-Light Date: Fri, 14 Jun 2024 19:14:25 +0100 Subject: [PATCH] bruh --- core.abc | 2 - index.html | 36 +++---- src/ast.rs | 8 +- src/main.rs | 195 ++++++++++++++++++------------------ src/optimization.rs | 39 ++++---- src/parsing.rs | 106 +++++++++----------- src/temporary_intrinsics.rs | 3 + test.txt | 9 ++ 8 files changed, 197 insertions(+), 201 deletions(-) create mode 100644 src/temporary_intrinsics.rs diff --git a/core.abc b/core.abc index 390bb44..7caa6f1 100644 --- a/core.abc +++ b/core.abc @@ -16,8 +16,6 @@ macro type_integer_i64 7 macro value_literal 0 macro value_variable 1 -macro value_type 2 -macro value_register 3 # making `if` and `loop` functions requires adding the ability for functions to # manipualte the AST under them, so for now they are intrinsics until this diff --git a/index.html b/index.html index 6e0387b..1467ed8 100644 --- a/index.html +++ b/index.html @@ -34,12 +34,12 @@

🚧Given the early state of development anything and everything might not work🚧

Supports aarch64.
Doesn't support RISC-V.
Won't support x86-64.

Hello, World!

@@ -59,27 +59,27 @@
Rust

Keywords

    -
  1. if: If x0 contains 1 runs the indented code.
  2. -
  3. loop: Rust's loop without the brackets.
  4. -
  5. break: Jumps out of the current loop.
  6. +
  7. if: Rust's if without the brackets*.
  8. +
  9. loop: Rust's loop without the brackets*.
  10. +
  11. break: Jumps out of the current loop*.
  12. def: Python's def without the :.
  13. -
  14. in: The arguments given to a function.
  15. +
  16. in: The arguments given to a function**.
  17. +
  18. out: The output given to a function. a := b - c + d after unrolling and type inference is equivalent to

    c + d @ x
    b - x @ y
    a := y @ null

    @ can only be defined on the outermost expression in a line, the outputs of nested expression are used to chain the expressions together.
  19. fail: If reachable triggers a failure in formal verification.
  20. -
  21. assume: Mark code which will be passed to formal verification but excluded at run-time e.g. x %= 4 tells the verifier { 0 ≤ x < 4 }.
  22. -
  23. typeof: Get the type of a value.
  24. -
  25. valueof: Get the value of a value.
  26. +
  27. assume: Mark code which will be passed to formal verification but excluded at run-time. assume x %= 4 tells the verifier { 0 ≤ x < 4 }.
  28. +
  29. valueof: Gets the value of a value.

    a := valueof 1 // a = value_literal = 0
    b := valueof c // b = value_variable (1)

  30. unreachable: Marks the end of execution, all following statements are unreachable.
  31. +
  32. macro: Similar to macros in C. Macro evaluation happens before the AST is parsed and simply performs a find and replace on the text contents of source files. Since := is a function, macro is how constants used in the definition of := are defined. Unless writing your own core, you should never need to use this.
  33. asm: The prefix for a line with in-line assembly e.g. asm mov x0, 1.
+

* if, loop and break will be removed as intrinsics and added to the core library as functions when the compiler is re-written in the language. At this point functions will be able to mutate their child AST. This will enable equivalent support to procedural macros found in other languages but better.

+

** I'm thinking about splitting in into lhs and rhs as this will make all input patterns differentiable to a function. Currently with in when you have a function my_function calling it with a my_function b or my_function a b appears identical to the function.

Tentative keywords

    -
  1. comptime: Forces evaluation at compile-time.
  2. -
  3. assert: A condition that calls exit if false at run-time.
  4. +
  5. assert: A condition that calls exit if false at run-time. This will tie into some mechanism to auto-generate unique error codes.
  6. verify: Like require but does not exhaustively check the condition. verify x < 2 will apply a number of steps of gradient descent to search for the minium of x to verify the condition. While this is not formal verification it provides an effective and almost always correct mechanism to test conditions at compile time that would be too computationally expensive to exhaustively prove.
  7. -
  8. abstain: Resets the understood domain of a variable at compile to any value.
  9. clone: Marks the split of execution into multiple threads.
  10. -
  11. ast: Gets the AST of code indented under a function call.
  12. -
  13. inline: Inline an AST as if it where source code at this location.
  14. +
  15. ast: Gets the AST of code indented under a function call. This can then be edited so a function may perform the role of a procedural macro.
diff --git a/src/ast.rs b/src/ast.rs index 4871d75..28ef734 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -301,8 +301,6 @@ pub struct Variable { pub identifier: Identifier, /// The index operation e.g. `[1]` in `x[1]`. pub index: Option>, - /// The cast e.g. `:i32` in `x:i32`. - pub cast: Option, } impl From for Variable { @@ -311,7 +309,6 @@ impl From for Variable { addressing: Addressing::Direct, identifier, index: None, - cast: None, } } } @@ -322,7 +319,6 @@ impl From<&[char]> for Variable { addressing: Addressing::Direct, identifier: Identifier::from(chars), index: None, - cast: None, } } } @@ -488,7 +484,7 @@ pub enum Type { Array(Box), Reference(Box), TypeType, - Boolean + Boolean, } impl Default for Type { @@ -542,7 +538,7 @@ impl std::fmt::Display for Type { Array(array) => write!(f, "{array}"), Reference(reference) => write!(f, "&{reference}"), TypeType => write!(f, "type"), - Boolean => write!(f,"bool") + Boolean => write!(f, "bool"), } } } diff --git a/src/main.rs b/src/main.rs index 134ea0b..e526fe5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,13 +23,14 @@ mod macros; mod parsing; -mod backend; -use backend::*; +// mod backend; -mod exploration; +// mod exploration; mod inlining; +mod temporary_intrinsics; + mod optimization; mod including; @@ -157,6 +158,10 @@ fn build(path: Option) { // Inlines functions let inlined = unsafe { inlining::inline_functions(nodes) }; + + // Convert `loop`s and `if`s to inline assembly. + let inlined = unsafe { temporary_intrinsics::inline(nodes) }; + let inlined_string = display_ast(inlined); write_file( &project_path, @@ -165,8 +170,6 @@ fn build(path: Option) { &mut lock_file, ); - // Convert `loop`s and `if`s to inline assembly. - // Pre-exploration optimizations unsafe { optimization::prex_optimization(inlined) } let prex_optimization_string = display_ast(inlined); @@ -177,27 +180,27 @@ fn build(path: Option) { &mut lock_file, ); - // Explore variable space. - let mut explorer = unsafe { exploration::Explorer::new(inlined) }; - let mut i = 0; - let explored = loop { - explorer = match unsafe { explorer.next() } { - exploration::ExplorationResult::Continue(e) => e, - exploration::ExplorationResult::Done(e) => break e, - }; - - i += 1; - assert!(i < 50); - }; - let explored_string = display_ast(explored); - write_file( - &project_path, - PathBuf::from("explored").with_extension(LANGUAGE_EXTENSION), - explored_string.as_bytes(), - &mut lock_file, - ); + // // Explore variable space. + // let mut explorer = unsafe { exploration::Explorer::new(inlined) }; + // let mut i = 0; + // let explored = loop { + // explorer = match unsafe { explorer.next() } { + // exploration::ExplorationResult::Continue(e) => e, + // exploration::ExplorationResult::Done(e) => break e, + // }; + + // i += 1; + // assert!(i < 50); + // }; + // let explored_string = display_ast(explored); + // write_file( + // &project_path, + // PathBuf::from("explored").with_extension(LANGUAGE_EXTENSION), + // explored_string.as_bytes(), + // &mut lock_file, + // ); - // Optimize source + // // Optimize source // let optimized = unsafe { optimize(explored) }; // let optimized_string = display_ast(optimized); // write_file( @@ -207,78 +210,78 @@ fn build(path: Option) { // &mut lock_file, // ); - // Construct assembly - let assembly = assembly_from_node(explored); - let assembly_path = PathBuf::from("assembly").with_extension("s"); - write_file( - &project_path, - assembly_path.clone(), - assembly.as_bytes(), - &mut lock_file, - ); + // // Construct assembly + // let assembly = assembly_from_node(explored); + // let assembly_path = PathBuf::from("assembly").with_extension("s"); + // write_file( + // &project_path, + // assembly_path.clone(), + // assembly.as_bytes(), + // &mut lock_file, + // ); - // Make object file - let object_path = project_path.join("build").join("object").with_extension("o"); - let object_output = std::process::Command::new("as") - .args([ - "-o", - &object_path.display().to_string(), - &project_path.join("build").join(assembly_path).display().to_string(), - ]) - .output() - .unwrap(); - assert_eq!( - object_output.stdout, - [], - "{}", - std::str::from_utf8(&object_output.stdout).unwrap() - ); - assert_eq!( - object_output.stderr, - [], - "{}", - std::str::from_utf8(&object_output.stderr).unwrap() - ); - assert_eq!(object_output.status.code(), Some(0)); - // Write object hash - let mut object_buffer = Vec::new(); - let mut object_file = OpenOptions::new().read(true).open(&object_path).unwrap(); - object_file.read_to_end(&mut object_buffer).unwrap(); - let object_hash = HEXUPPER.encode(sha256_digest(object_buffer.as_slice()).unwrap().as_ref()); - let object_file_name = object_path.file_stem().unwrap(); - writeln!(&mut lock_file, "{},{object_hash}", object_file_name.to_str().unwrap()).unwrap(); - - // Make binary file - let binary_path = project_path.join("build").join("binary"); - let binary_output = std::process::Command::new("ld") - .args([ - "-s", - "-o", - &binary_path.display().to_string(), - &object_path.display().to_string(), - ]) - .output() - .unwrap(); - assert_eq!( - binary_output.stdout, - [], - "{}", - std::str::from_utf8(&binary_output.stdout).unwrap() - ); - assert_eq!( - binary_output.stderr, - [], - "{}", - std::str::from_utf8(&binary_output.stderr).unwrap() - ); - assert_eq!(binary_output.status.code(), Some(0)); - // Write binary hash - let mut binary_buffer = Vec::new(); - let mut binary_file = OpenOptions::new().read(true).open(&binary_path).unwrap(); - binary_file.read_to_end(&mut binary_buffer).unwrap(); - let binary_hash = HEXUPPER.encode(sha256_digest(binary_buffer.as_slice()).unwrap().as_ref()); - let binary_file_name = binary_path.file_stem().unwrap(); - writeln!(&mut lock_file, "{},{binary_hash}", binary_file_name.to_str().unwrap()).unwrap(); + // // Make object file + // let object_path = project_path.join("build").join("object").with_extension("o"); + // let object_output = std::process::Command::new("as") + // .args([ + // "-o", + // &object_path.display().to_string(), + // &project_path.join("build").join(assembly_path).display().to_string(), + // ]) + // .output() + // .unwrap(); + // assert_eq!( + // object_output.stdout, + // [], + // "{}", + // std::str::from_utf8(&object_output.stdout).unwrap() + // ); + // assert_eq!( + // object_output.stderr, + // [], + // "{}", + // std::str::from_utf8(&object_output.stderr).unwrap() + // ); + // assert_eq!(object_output.status.code(), Some(0)); + // // Write object hash + // let mut object_buffer = Vec::new(); + // let mut object_file = OpenOptions::new().read(true).open(&object_path).unwrap(); + // object_file.read_to_end(&mut object_buffer).unwrap(); + // let object_hash = HEXUPPER.encode(sha256_digest(object_buffer.as_slice()).unwrap().as_ref()); + // let object_file_name = object_path.file_stem().unwrap(); + // writeln!(&mut lock_file, "{},{object_hash}", object_file_name.to_str().unwrap()).unwrap(); + + // // Make binary file + // let binary_path = project_path.join("build").join("binary"); + // let binary_output = std::process::Command::new("ld") + // .args([ + // "-s", + // "-o", + // &binary_path.display().to_string(), + // &object_path.display().to_string(), + // ]) + // .output() + // .unwrap(); + // assert_eq!( + // binary_output.stdout, + // [], + // "{}", + // std::str::from_utf8(&binary_output.stdout).unwrap() + // ); + // assert_eq!( + // binary_output.stderr, + // [], + // "{}", + // std::str::from_utf8(&binary_output.stderr).unwrap() + // ); + // assert_eq!(binary_output.status.code(), Some(0)); + // // Write binary hash + // let mut binary_buffer = Vec::new(); + // let mut binary_file = OpenOptions::new().read(true).open(&binary_path).unwrap(); + // binary_file.read_to_end(&mut binary_buffer).unwrap(); + // let binary_hash = HEXUPPER.encode(sha256_digest(binary_buffer.as_slice()).unwrap().as_ref()); + // let binary_file_name = binary_path.file_stem().unwrap(); + // writeln!(&mut lock_file, "{},{binary_hash}", binary_file_name.to_str().unwrap()).unwrap(); } fn run(path: Option) { diff --git a/src/optimization.rs b/src/optimization.rs index c9673a0..8fa99b1 100644 --- a/src/optimization.rs +++ b/src/optimization.rs @@ -5,25 +5,26 @@ use std::ptr::NonNull; // Pre-exploration optimization. pub unsafe fn prex_optimization(root: NonNull) { - let mut stack = vec![root]; - while let Some(mut current) = stack.pop() { - match current.as_ref().statement.op { - Op::Unreachable => { - assert_eq!(current.as_ref().child, None); - if let Some(next) = current.as_ref().next { - crate::exploration::dealloc_ast(next); - } - } - _ => { - if let Some(child) = current.as_ref().child { - stack.push(child); - } - if let Some(next) = current.as_ref().next { - stack.push(next); - } - } - } - } + // let mut stack = vec![root]; + // while let Some(mut current) = stack.pop() { + // match current.as_ref().statement.op { + // Op::Unreachable => { + // assert_eq!(current.as_ref().child, None); + // if let Some(next) = current.as_ref().next { + // crate::exploration::dealloc_ast(next); + // } + // } + // _ => { + // if let Some(child) = current.as_ref().child { + // stack.push(child); + // } + // if let Some(next) = current.as_ref().next { + // stack.push(next); + // } + // } + // } + // } + todo!() } /// Post-exploration optimization. diff --git a/src/parsing.rs b/src/parsing.rs index 13dbe4c..8c91c74 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -15,10 +15,23 @@ enum ParseAstError { Line(ParseLineError), } +/// Returns the set of intrinsic functions. +fn default_definitions() -> HashSet { + // `def` is specially handled so doesn't need to be here. + [ + "assume", + "fail", + "loop", + "break", + "if", + "valueof", + "unreachable" + ].into_iter().map(Identifier::from).collect::>() +} + pub unsafe fn parse_ast(chars: &[char]) -> Result>, ParseAstError> { - let mut definitions = HashSet::new(); + let mut definitions = default_definitions(); let mut first = OnceCell::new(); - let mut last = None; let mut i = 0; // The stack of parent nodes e.g. // ``` @@ -27,7 +40,7 @@ pub unsafe fn parse_ast(chars: &[char]) -> Result>, Pars // c // ``` // forms `[a,b,c]`. - let mut parent_stack = Vec::new(); + let mut parent_stack = Vec::>::new(); #[cfg(debug_assertions)] let mut limit = 0..LOOP_LIMIT; @@ -79,7 +92,7 @@ pub unsafe fn parse_ast(chars: &[char]) -> Result>, Pars i += line_length; let line = parse_line(line_characters, &mut definitions).map_err(ParseAstError::Line)?; - let node = NonNull::new(Box::into_raw(Box::new(AstNode { + let mut node = NonNull::new(Box::into_raw(Box::new(AstNode { line, preceding: None, child: None, @@ -89,7 +102,7 @@ pub unsafe fn parse_ast(chars: &[char]) -> Result>, Pars // Insert into the AST. let tabs = div_rem(spaces, 4).ok_or(ParseAstError::IndentationSpacing)?; - let after = split_off_checked(&mut parent_stack, tabs).ok_or(ParseAstError::IndetationDepth)?; + let mut after = split_off_checked(&mut parent_stack, tabs).ok_or(ParseAstError::IndetationDepth)?; if let Some(previous) = after.first_mut() { assert_eq!(previous.as_mut().next, None); previous.as_mut().next = Some(node); @@ -116,7 +129,7 @@ enum ParseLineError { pub fn parse_line(chars: &[char], definitions: &mut HashSet) -> Result { let line = if let Some((lhs, rhs)) = chars.split_at_checked(3) - && lhs == ['a','s','m'] + && lhs == ['a', 's', 'm'] { Line::Assembly(crate::parse_instruction(rhs).map_err(ParseLineError::Instruction)?) } else { @@ -127,35 +140,39 @@ pub fn parse_line(chars: &[char], definitions: &mut HashSet) -> Resu #[derive(Debug, Error)] enum ParseExpressionError { + #[error("Failed to parse definition expression.")] + Def, #[error("Empty statement")] Empty, - #[error("Badly formed definition")] - Def, #[error("Failed to parse variable: {0}")] Variable(ParseVariableError), #[error("Failed to parse value: {0}")] - Value(ParseValueError) + Value(ParseValueError), } + pub fn parse_expression( chars: &[char], definitions: &mut HashSet, ) -> Result { - let mut values = chars.split(|c|*c==' ').collect::>(); + let mut values = chars.split(|c| *c == ' ').collect::>(); - if let Some(['d','e','f']) = values.first() { + // Since `def`s define operands we need to handle this case specifically so the operands get added to the definitions set for future statements. + if let Some(['d', 'e', 'f']) = values.first() { let [_, key] = values.as_slice() else { return Err(ParseExpressionError::Def); }; definitions.insert(Identifier::from(*key)); return Ok(Expression { - op: Identifier::from(['d','e','f'].as_slice()), + op: Identifier::from(['d', 'e', 'f'].as_slice()), lhs: Arg::new(), rhs: Nested::Values(vec![Value::Variable(Variable::from(*key))]), out: None, }); } + // Each expression can define 1 output for the outermost expression. + // The outputs for nested expressions are auto generated. let out = match values.as_slice() { [.., a, b] if *a == ['@'] => { let var = parse_variable(b).map_err(ParseExpressionError::Variable)?; @@ -191,16 +208,15 @@ pub fn parse_expression( section.push(parse_value(value).map_err(ParseExpressionError::Value)?); } } - - todo!() + let mut partial = partial_opt.ok_or(ParseExpressionError::Empty)?; + partial.lhs = section; + Ok(partial) } - - #[derive(Debug, Error)] enum ParseValueError { #[error("Generic failure")] - Generic + Generic, } pub fn parse_value(chars: &[char]) -> Result { @@ -230,7 +246,6 @@ pub fn parse_variable(chars: &[char]) -> Result { }; let mut index = None; - let mut cast = None; #[cfg(debug_assertions)] let mut limit = 0..LOOP_LIMIT; @@ -247,20 +262,13 @@ pub fn parse_variable(chars: &[char]) -> Result { .skip(i) .find_map(|(j, c)| (*c == ']').then_some(j)) .ok_or(ParseVariableError::NonTerminatedIndex)?; - index = Some(parse_offset(&chars[i..n]).map_err(ParseVariableError::Offset)?); - cast = match chars.get(n + 1) { - Some(':') => Some(parse_type(&chars[n + 2..])?), - None => None, - _ => return Err(()), - }; - break; - } - ':' => { - cast = Some(parse_type(&chars[i..]).map_err(ParseVariableError::Type)?); + index = Some(Box::new( + parse_offset(&chars[i..n]).map_err(ParseVariableError::Offset)?, + )); break; } _ => { - identifier.push(c); + identifier.push(*c); } } } @@ -269,24 +277,24 @@ pub fn parse_variable(chars: &[char]) -> Result { addressing, identifier, index, - cast, }) } - #[derive(Debug, Error)] enum ParseOffsetError { + #[error("Empty set cannot be parsed.")] + Empty, #[error("Failed to parse integer.")] Integer, #[error("Failed tp parse variable: {0}")] - Variable(Box) + Variable(Box), } pub fn parse_offset(chars: &[char]) -> Result { #[cfg(debug_assertions)] let mut limit = 0..LOOP_LIMIT; const BASE: u32 = 10; - let first = chars.get(0).ok_or(())?; + let first = chars.get(0).ok_or(ParseOffsetError::Empty)?; if let Some(mut digit) = first.to_digit(BASE) { let mut iter = chars[1..].iter(); loop { @@ -300,7 +308,9 @@ pub fn parse_offset(chars: &[char]) -> Result { Ok(Offset::Integer(digit as u64)) } else { // TODO This uses recursion, fix this so it doesn't use recursion. - Ok(Offset::Variable(parse_variable(chars).map_err(|err|ParseOffsetError::Variable(Box::new(err)))?)) + Ok(Offset::Variable( + parse_variable(chars).map_err(|err| ParseOffsetError::Variable(Box::new(err)))?, + )) } } @@ -312,34 +322,10 @@ fn div_rem(rhs: usize, lhs: usize) -> Option { } /// [`slice::split_at`] has [`slice::split_at_checked`] this is the equivalent /// for [`Vec::split_off`]. -fn split_off_checked(vec: &mut Vec, index: usize) -> Option> { +fn split_off_checked(vec: &mut Vec, index: usize) -> Option> { if index > vec.len() { None } else { - Some(vec.split_at(index)) + Some(vec.split_off(index)) } } - -#[derive(Debug, Error)] -#[error("Failed to parse type.")] -struct ParseTypeError; - -pub fn parse_type(chars: &[char]) -> Result { - let - loop { - match chars { - ['i','8'] => Ok(Type::Integer(IntegerType::I8)), - ['i','1','6'] => Ok(Type::Integer(IntegerType::I16)), - ['i','3','2'] => Ok(Type::Integer(IntegerType::I32)), - ['i','6','4'] => Ok(Type::Integer(IntegerType::I64)), - ['u','8'] => Ok(Type::Integer(IntegerType::I8)), - ['u','1','6'] => Ok(Type::Integer(IntegerType::U16)), - ['u','3','2'] => Ok(Type::Integer(IntegerType::U32)), - ['u','6','4'] => Ok(Type::Integer(IntegerType::U64)), - ['b','o','o','l'] => Ok(Type::Boolean), - ['&', tail @ ..] => - _ => Err(ParseTypeError) - } - } - -} \ No newline at end of file diff --git a/src/temporary_intrinsics.rs b/src/temporary_intrinsics.rs new file mode 100644 index 0000000..a40996b --- /dev/null +++ b/src/temporary_intrinsics.rs @@ -0,0 +1,3 @@ +pub unsafe fn inline(_node: NonNull) -> NonNull { + todo!() +} \ No newline at end of file diff --git a/test.txt b/test.txt index e69de29..7ef9859 100644 --- a/test.txt +++ b/test.txt @@ -0,0 +1,9 @@ +a := b - c + d + +c + d @ x +b - x @ y +a := z @ null + +a := b - c + d @ null + +a := b - c + d @ null \ No newline at end of file