From 59bfa8df4071b5efe28217b1bd3eab2c285afa70 Mon Sep 17 00:00:00 2001 From: Jack Yansong Li Date: Tue, 4 Feb 2025 20:06:22 +0800 Subject: [PATCH] added rich-string%split in GoldfishLang.tmu --- GoldfishLang.tmu | 80 +++++++++++++++++++++++++++++++ goldfish/liii/lang.scm | 26 ++++++++++ tests/goldfish/liii/lang-test.scm | 10 ++++ 3 files changed, 116 insertions(+) diff --git a/GoldfishLang.tmu b/GoldfishLang.tmu index aa1e7aa..88f208f 100644 --- a/GoldfishLang.tmu +++ b/GoldfishLang.tmu @@ -2037,6 +2037,86 @@ (check-catch 'unbound-variable (123:strip-suffix 1)) + + + + + <\goldfish-chunk|goldfish/liii/lang.scm|true|true> + (define (%split sep) + + \ \ ;; 在 %split 内部定义 string-split 作为子函数 + + \ \ (define (string-split str sep) + + \ \ \ \ (let ((sep-len (string-length sep)) + + \ \ \ \ \ \ \ \ \ \ (str-len (string-length str))) + + \ \ \ \ \ \ (cond + + \ \ \ \ \ \ \ ;; 处理分隔符长度为0的特殊情况 + + \ \ \ \ \ \ \ ((= sep-len 0) + + \ \ \ \ \ \ \ \ (map string (string-\list str))) + + \ \ \ \ \ \ \ (else + + \ \ \ \ \ \ \ \ (let loop ((start 0) (result '())) + + \ \ \ \ \ \ \ \ \ \ (if (\= start str-len) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ (reverse result) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ (let ((found-at + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (let search ((i start)) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (and (\= (+ i sep-len) str-len) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (or (and (string-starts? (substring str i (+ i sep-len)) sep) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ i) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (search (+ i 1))))))) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (if found-at + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (loop (+ found-at sep-len) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (cons (substring str start found-at) result)) + + \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (reverse (cons (substring str start str-len) result)))))))))) + + \ \ ;; 利用(liii string)定义的 string-split 来处理 data + + \ \ (rich-vector (list-\vector (string-split data sep)))) + + \; + + + <\goldfish-chunk|tests/goldfish/liii/lang-test.scm|true|true> + \; + + (check ((box "da@liii.pro") :split "@") =\ (box (vector "da" "liii.pro"))) + + (check ((box "da@liii.pro") :split ".") =\ (box (vector "da@liii" "pro"))) + + (check (((box "da@liii.pro") :split "@") :collect) =\ (vector "da" "liii.pro")) ;Test for chaining + + (check ((box "test") :split "") =\ (box (vector "t" "e" "s" "t"))) + + (check ((box "aXXbXXcXX") :split "XX") =\ (box (vector "a" "b" "c"))) + + (check ((box "a\|\|b\|\|c") :split "\|\|") =\ (box (vector "a" "b" "c"))) + + (check ((box "XXaXXb") :split "XX") =\ (box (vector "" "a" "b"))) ;分隔符出现在开头 + + (check ((box "你好,欢迎使用Liii STEM") :split ",") =\ (box (vector "你好" "欢迎使用Liii STEM"))) + + \; + + <\scm-chunk|goldfish/liii/lang.scm|true|true> diff --git a/goldfish/liii/lang.scm b/goldfish/liii/lang.scm index d9130ed..3c59064 100644 --- a/goldfish/liii/lang.scm +++ b/goldfish/liii/lang.scm @@ -424,6 +424,32 @@ (if (null? xs) result (apply result xs)))) +(define (%split sep) + ;; 在 %split 内部定义 string-split 作为子函数 + (define (string-split str sep) + (let ((sep-len (string-length sep)) + (str-len (string-length str))) + (cond + ;; 处理分隔符长度为0的特殊情况 + ((= sep-len 0) + (map string (string->list str))) + (else + (let loop ((start 0) (result '())) + (if (>= start str-len) + (reverse result) + (let ((found-at + (let search ((i start)) + (and (<= (+ i sep-len) str-len) + (or (and (string-starts? (substring str i (+ i sep-len)) sep) + i) + (search (+ i 1))))))) + (if found-at + (loop (+ found-at sep-len) + (cons (substring str start found-at) result)) + (reverse (cons (substring str start str-len) result)))))))))) + ;; 利用(liii string)定义的 string-split 来处理 data + (rich-vector (list->vector (string-split data sep)))) + ) (define-case-class rich-list ((data list?)) diff --git a/tests/goldfish/liii/lang-test.scm b/tests/goldfish/liii/lang-test.scm index 883ea69..e5e1a9c 100644 --- a/tests/goldfish/liii/lang-test.scm +++ b/tests/goldfish/liii/lang-test.scm @@ -295,6 +295,16 @@ (check-catch 'wrong-number-of-args ("hello":strip-suffix "llo")) (check-catch 'unbound-variable (123:strip-suffix 1)) + +(check ((box "da@liii.pro") :split "@") => (box (vector "da" "liii.pro"))) +(check ((box "da@liii.pro") :split ".") => (box (vector "da@liii" "pro"))) +(check (((box "da@liii.pro") :split "@") :collect) => (vector "da" "liii.pro")) ;Test for chaining +(check ((box "test") :split "") => (box (vector "t" "e" "s" "t"))) +(check ((box "aXXbXXcXX") :split "XX") => (box (vector "a" "b" "c"))) +(check ((box "a||b||c") :split "||") => (box (vector "a" "b" "c"))) +(check ((box "XXaXXb") :split "XX") => (box (vector "" "a" "b"))) ;分隔符出现在开头 +(check ((box "你好,欢迎使用Liii STEM") :split ",") => (box (vector "你好" "欢迎使用Liii STEM"))) + (check (rich-list :range 1 5) => (box (list 1 2 3 4))) (check (rich-list :range 1 5 2) => (box (list 1 3))) (check (rich-list :range 1 6 2) => (box (list 1 3 5)))