diff --git a/mojo/stdlib/src/builtin/string_literal.mojo b/mojo/stdlib/src/builtin/string_literal.mojo index f72938df4b..61fb0265a4 100644 --- a/mojo/stdlib/src/builtin/string_literal.mojo +++ b/mojo/stdlib/src/builtin/string_literal.mojo @@ -687,58 +687,99 @@ struct StringLiteral( """ return String(elems, sep=self) - fn split(self, sep: StringSlice, maxsplit: Int = -1) raises -> List[String]: - """Split the string literal by a separator. + @always_inline + fn split(self, sep: StringSlice, maxsplit: Int) raises -> List[String]: + """Split the string by a separator. Args: sep: The string to split on. maxsplit: The maximum amount of items to split from String. - Defaults to unlimited. Returns: A List of Strings containing the input split by the separator. Examples: + ```mojo + # Splitting with maxsplit + _ = "1,2,3".split(",", maxsplit=1) # ['1', '2,3'] + # Splitting with starting or ending separators + _ = ",1,2,3,".split(",", maxsplit=1) # ['', '1,2,3,'] + ``` + . + """ + # TODO(#3528): add this example + # _ = "123".split("", maxsplit=1) # ['', '123'] + return _to_string_list(self.as_string_slice().split(sep, maxsplit)) + + @always_inline + fn split(self, sep: StringSlice) raises -> List[String]: + """Split the string by a separator. + Args: + sep: The string to split on. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: ```mojo # Splitting a space _ = "hello world".split(" ") # ["hello", "world"] # Splitting adjacent separators _ = "hello,,world".split(",") # ["hello", "", "world"] + # Splitting with starting or ending separators + _ = ",1,2,3,".split(",") # ['', '1', '2', '3', ''] + ``` + . + """ + # TODO(#3528): add this example + # _ = "123".split("") # ['', '1', '2', '3', ''] + return _to_string_list(self.as_string_slice().split(sep, -1)) + + @always_inline + fn split(self, *, maxsplit: Int) -> List[String]: + """Split the string by every Whitespace separator. + + Args: + maxsplit: The maximum amount of items to split from String. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: + ```mojo # Splitting with maxsplit - _ = "1,2,3".split(",", 1) # ['1', '2,3'] + _ = "1 2 3".split(maxsplit=1) # ['1', '2 3'] ``` . """ - return String(self).split(sep, maxsplit) + return _to_string_list(self.as_string_slice().split(maxsplit=maxsplit)) - fn split(self, sep: NoneType = None, maxsplit: Int = -1) -> List[String]: - """Split the string literal by every whitespace separator. + @always_inline + fn split(self, sep: NoneType = None) -> List[String]: + """Split the string by every Whitespace separator. Args: sep: None. - maxsplit: The maximum amount of items to split from string. Defaults - to unlimited. Returns: A List of Strings containing the input split by the separator. Examples: - ```mojo # Splitting an empty string or filled with whitespaces _ = " ".split() # [] _ = "".split() # [] - # Splitting a string with leading, trailing, and middle whitespaces _ = " hello world ".split() # ["hello", "world"] # Splitting adjacent universal newlines: - _ = "hello \\t\\n\\v\\f\\r\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world".split() - # ["hello", "world"] + _ = ( + "hello \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world" + ).split() # ["hello", "world"] ``` . """ - return String(self).split(sep, maxsplit) + return _to_string_list(self.as_string_slice().split(sep)) fn splitlines(self, keepends: Bool = False) -> List[String]: """Split the string literal at line boundaries. This corresponds to Python's diff --git a/mojo/stdlib/src/collections/string/string.mojo b/mojo/stdlib/src/collections/string/string.mojo index 85e121f004..092dd6cfd0 100644 --- a/mojo/stdlib/src/collections/string/string.mojo +++ b/mojo/stdlib/src/collections/string/string.mojo @@ -1413,76 +1413,100 @@ struct String( return self.as_string_slice().isspace() # TODO(MSTDL-590): String.split() should return `StringSlice`s. - fn split(self, sep: StringSlice, maxsplit: Int = -1) raises -> List[String]: + # FIX: #3528 + @always_inline + fn split(self, sep: StringSlice, maxsplit: Int) raises -> List[String]: """Split the string by a separator. Args: sep: The string to split on. maxsplit: The maximum amount of items to split from String. - Defaults to unlimited. Returns: A List of Strings containing the input split by the separator. - Raises: - If the separator is empty. - Examples: + ```mojo + # Splitting with maxsplit + _ = "1,2,3".split(",", maxsplit=1) # ['1', '2,3'] + # Splitting with starting or ending separators + _ = ",1,2,3,".split(",", maxsplit=1) # ['', '1,2,3,'] + ``` + . + """ + # TODO(#3528): add this example + # _ = "123".split("", maxsplit=1) # ['', '123'] + return _to_string_list(self.as_string_slice().split(sep, maxsplit)) + + @always_inline + fn split(self, sep: StringSlice) raises -> List[String]: + """Split the string by a separator. + Args: + sep: The string to split on. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: ```mojo # Splitting a space - _ = String("hello world").split(" ") # ["hello", "world"] + _ = "hello world".split(" ") # ["hello", "world"] # Splitting adjacent separators - _ = String("hello,,world").split(",") # ["hello", "", "world"] + _ = "hello,,world".split(",") # ["hello", "", "world"] + # Splitting with starting or ending separators + _ = ",1,2,3,".split(",") # ['', '1', '2', '3', ''] + ``` + . + """ + # TODO(#3528): add this example + # _ = "123".split("") # ['', '1', '2', '3', ''] + return _to_string_list(self.as_string_slice().split(sep, -1)) + + @always_inline + fn split(self, *, maxsplit: Int) -> List[String]: + """Split the string by every Whitespace separator. + + Args: + maxsplit: The maximum amount of items to split from String. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: + ```mojo # Splitting with maxsplit - _ = String("1,2,3").split(",", 1) # ['1', '2,3'] + _ = "1 2 3".split(maxsplit=1) # ['1', '2 3'] ``` . """ - return self.as_string_slice().split[sep.mut, sep.origin]( - sep, maxsplit=maxsplit - ) + return _to_string_list(self.as_string_slice().split(maxsplit=maxsplit)) - fn split(self, sep: NoneType = None, maxsplit: Int = -1) -> List[String]: + @always_inline + fn split(self, sep: NoneType = None) -> List[String]: """Split the string by every Whitespace separator. Args: sep: None. - maxsplit: The maximum amount of items to split from String. Defaults - to unlimited. Returns: A List of Strings containing the input split by the separator. Examples: - ```mojo # Splitting an empty string or filled with whitespaces - _ = String(" ").split() # [] - _ = String("").split() # [] - + _ = " ".split() # [] + _ = "".split() # [] # Splitting a string with leading, trailing, and middle whitespaces - _ = String(" hello world ").split() # ["hello", "world"] + _ = " hello world ".split() # ["hello", "world"] # Splitting adjacent universal newlines: - _ = String( - "hello \\t\\n\\v\\f\\r\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world" + _ = ( + "hello \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world" ).split() # ["hello", "world"] ``` . """ - - # TODO(MSTDL-590): Avoid the need to loop to convert `StringSlice` to - # `String` by making `String.split()` return `StringSlice`s. - var str_slices = self.as_string_slice()._split_whitespace( - maxsplit=maxsplit - ) - - var output = List[String](capacity=len(str_slices)) - - for str_slice in str_slices: - output.append(String(str_slice[])) - - return output^ + return _to_string_list(self.as_string_slice().split(sep)) fn splitlines(self, keepends: Bool = False) -> List[String]: """Split the string at line boundaries. This corresponds to Python's diff --git a/mojo/stdlib/src/collections/string/string_slice.mojo b/mojo/stdlib/src/collections/string/string_slice.mojo index 7ec78dc993..fe69a28d67 100644 --- a/mojo/stdlib/src/collections/string/string_slice.mojo +++ b/mojo/stdlib/src/collections/string/string_slice.mojo @@ -949,22 +949,13 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]]( # Methods # ===------------------------------------------------------------------===# - fn split[ - sep_mut: Bool, - sep_origin: Origin[sep_mut], //, - ]( - self, - sep: StringSlice[sep_origin], - maxsplit: Int = -1, - ) raises -> List[ - String + fn _split[ + sep_mut: Bool, sep_origin: Origin[sep_mut] + ](self, sep: StringSlice[sep_origin], maxsplit: Int = -1) raises -> List[ + Self ]: """Split the string by a separator. - Parameters: - sep_mut: Mutability of the `sep` string slice. - sep_origin: Origin of the `sep` string slice. - Args: sep: The string to split on. maxsplit: The maximum amount of items to split from String. @@ -975,20 +966,8 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]]( Raises: If the separator is empty. - - Examples: - - ```mojo - # Splitting a space - _ = StringSlice("hello world").split(" ") # ["hello", "world"] - # Splitting adjacent separators - _ = StringSlice("hello,,world").split(",") # ["hello", "", "world"] - # Splitting with maxsplit - _ = StringSlice("1,2,3").split(",", 1) # ['1', '2,3'] - ``` - . """ - var output = List[String]() + var output = List[Self]() var str_byte_len = self.byte_length() - 1 var lhs = 0 @@ -998,60 +977,28 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]]( if sep_len == 0: raise Error("Separator cannot be empty.") if str_byte_len < 0: - output.append(String("")) + output.append(rebind[Self]("".as_string_slice())) while lhs <= str_byte_len: rhs = self.find(sep, lhs) if rhs == -1: - output.append(String(self[lhs:])) + output.append(self[lhs:]) break if maxsplit > -1: if items == maxsplit: - output.append(String(self[lhs:])) + output.append(self[lhs:]) break items += 1 - output.append(String(self[lhs:rhs])) + output.append(self[lhs:rhs]) lhs = rhs + sep_len if self.endswith(sep) and (len(output) <= maxsplit or maxsplit == -1): - output.append(String("")) + output.append(rebind[Self]("".as_string_slice())) return output^ - fn split( - self, sep: NoneType = None, maxsplit: Int = -1 - ) -> List[StringSlice[origin]]: - """Split the string by every Whitespace separator. - - Args: - sep: None. - maxsplit: The maximum amount of items to split from String. Defaults - to unlimited. - - Returns: - A List of Strings containing the input split by the separator. - - Examples: - - ```mojo - # Splitting an empty string or filled with whitespaces - _ = StringSlice(" ").split() # [] - _ = StringSlice("").split() # [] - - # Splitting a string with leading, trailing, and middle whitespaces - _ = StringSlice(" hello world ").split() # ["hello", "world"] - # Splitting adjacent universal newlines: - _ = StringSlice( - "hello \\t\\n\\v\\f\\r\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world" - ).split() # ["hello", "world"] - ``` - . - """ - - return self._split_whitespace() - fn _split_whitespace(self, maxsplit: Int = -1) -> List[StringSlice[origin]]: fn num_bytes(b: UInt8) -> Int: var flipped = ~b @@ -1733,6 +1680,100 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]]( return True + @always_inline + fn split(self, sep: StringSlice, maxsplit: Int) raises -> List[Self]: + """Split the string by a separator. + + Args: + sep: The string to split on. + maxsplit: The maximum amount of items to split from String. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: + ```mojo + # Splitting with maxsplit + _ = "1,2,3".split(",", maxsplit=1) # ['1', '2,3'] + # Splitting with starting or ending separators + _ = ",1,2,3,".split(",", maxsplit=1) # ['', '1,2,3,'] + ``` + . + """ + # TODO(#3528): add this example + # _ = "123".split("", maxsplit=1) # ['', '123'] + return self._split(sep, maxsplit) + + @always_inline + fn split(self, sep: StringSlice) raises -> List[Self]: + """Split the string by a separator. + + Args: + sep: The string to split on. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: + ```mojo + # Splitting a space + _ = "hello world".split(" ") # ["hello", "world"] + # Splitting adjacent separators + _ = "hello,,world".split(",") # ["hello", "", "world"] + # Splitting with starting or ending separators + _ = ",1,2,3,".split(",") # ['', '1', '2', '3', ''] + ``` + . + """ + # TODO(#3528): add this example + # _ = "123".split("") # ['', '1', '2', '3', ''] + return self._split(sep, -1) + + @always_inline + fn split(self, *, maxsplit: Int) -> List[Self]: + """Split the string by every Whitespace separator. + + Args: + maxsplit: The maximum amount of items to split from String. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: + ```mojo + # Splitting with maxsplit + _ = "1 2 3".split(maxsplit=1) # ['1', '2 3'] + ``` + . + """ + return self._split_whitespace(maxsplit) + + @always_inline + fn split(self, sep: NoneType = None) -> List[Self]: + """Split the string by every Whitespace separator. + + Args: + sep: None. + + Returns: + A List of Strings containing the input split by the separator. + + Examples: + ```mojo + # Splitting an empty string or filled with whitespaces + _ = " ".split() # [] + _ = "".split() # [] + # Splitting a string with leading, trailing, and middle whitespaces + _ = " hello world ".split() # ["hello", "world"] + # Splitting adjacent universal newlines: + _ = ( + "hello \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world" + ).split() # ["hello", "world"] + ``` + . + """ + return self._split_whitespace(-1) + fn isnewline[single_character: Bool = False](self) -> Bool: """Determines whether every character in the given StringSlice is a python newline character. This corresponds to Python's