From be07bf5824e195add85e4240680446f8c1b1e903 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Mon, 6 Jan 2025 17:01:36 -0300 Subject: [PATCH 1/9] refactor: decode string before validate --- src/Helpers/Utils.php | 45 +++++++++++++++++++++++ tests/Unit/Helpers/UtilsTest.php | 63 +++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/src/Helpers/Utils.php b/src/Helpers/Utils.php index 15412d474a..da748c52ef 100644 --- a/src/Helpers/Utils.php +++ b/src/Helpers/Utils.php @@ -129,6 +129,45 @@ public static function removeBackslashes($data) return $data; } + /** + * @unreleased + */ + public static function recursiveUrlDecode($data) + { + $decoded = urldecode($data); + + return $decoded === $data ? $data : self::recursiveUrlDecode($decoded); + } + + /** + * @unreleased + */ + public static function recursiveBase64Decode($data) + { + $decodedData = base64_decode($data); + if ($decodedData !== false && base64_encode($decodedData) === $data) { + // If the decoded string is a valid Base64 string, decode again + return self::recursiveBase64Decode($decodedData); + } + + return $data; + } + + /** + * @unreleased + */ + public static function recursiveHexDecode($data) + { + $decodedData = hex2bin($data); + if ($decodedData !== false && bin2hex($decodedData) === $data) { + // If the decoded string is a valid Hex string, decode again + return self::recursiveHexDecode($decodedData); + } + + return $data; + } + + /** * The regular expression attempts to capture the basic structure of all data types that can be serialized by PHP. * @@ -141,6 +180,12 @@ public static function containsSerializedDataRegex($data): bool return false; } + $data = self::recursiveUrlDecode($data); + $data = self::recursiveBase64Decode($data); + $data = self::recursiveHexDecode($data); + + $data = preg_replace('/[^a-zA-Z0-9:{};"\'\.\[\]\(\),]/', '', $data); + $pattern = '/ (a:\d+:\{.*\}) | # Matches arrays (e.g: a:2:{i:0;s:5:"hello";i:1;i:42;}) (O:\d+:"[^"]+":\{.*\}) | # Matches objects (e.g: O:8:"stdClass":1:{s:4:"name";s:5:"James";}) diff --git a/tests/Unit/Helpers/UtilsTest.php b/tests/Unit/Helpers/UtilsTest.php index 68277d1ba5..9fe2e5ce3d 100644 --- a/tests/Unit/Helpers/UtilsTest.php +++ b/tests/Unit/Helpers/UtilsTest.php @@ -89,8 +89,6 @@ public function serializedDataProvider(): array { return [ [serialize('bar'), true], - ['\\' . serialize('backslash-bypass'), true], - ['\\\\' . serialize('double-backslash-bypass'), true], ['foo', false], [serialize('qux'), true], ['bar', false], @@ -103,6 +101,67 @@ public function serializedDataProvider(): array ['Lorem ipsum b:1; dolor sit amet', true], // boolean ['Lorem ipsum d:3.14; dolor sit amet', true], // float ['Lorem ipsum N; dolor sit amet', true], // NULL + // Strings with special characters (e.g: emojis, spaces, control characters) that are not part of a predefined set of safe characters for serialized data structures (used to try to bypass the validations) + [ + // emojis bypass sample + 'O😼:8:"stdClass":1:{s😼:4:"name";s😼:5:"James";}', + true, + ], + [ + // spaces bypass sample + 'O :8:"stdClass":1:{s :4:"name";s :5:"James";}', + true, + ], + // BYPASS WITH SIMPLE METHODS + [ + // backslash + '\\' . serialize('backslash-bypass'), + true, + ], + [ + // double-backslash + '\\\\' . serialize('double-backslash-bypass'), + true, + ], + // BYPASS WITH ENCODING STRING METHOD #1 - URL-encoded + [ + // Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} + 'O%3A8%3A%22stdClass%22%3A1%3A%7Bs%3A4%3A%22name%22%3Bs%3A5%3A%22James%22%3B%7D', + true, + ], + [ + // Double encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} + 'O%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%253A4%253A%2522name%2522%253Bs%253A5%253A%2522James%2522%253B%257D', + true, + ], + // BYPASS WITH ENCODING STRING METHOD #2 - Base64 + [ + // Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} + 'Tzo4OiJzdGRDbGFzcyI6MTp7czo0OiJuYW1lIjtzOjU6IkphbWVzIjt9', + true, + ], + [ + // Double encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} + 'VHp6MDpPOmp6I3N0ZENsYXNzIjoxOntzOjQ6Im5hbWUiO3M6NToiSmFtZXMiO31z', + true, + ], + // BYPASS WITH ENCODING STRING METHOD #3 - Hex-encoded + [ + // Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} + '4f3a383a22737464436c617373223a313a7b733a343a226e616d65223b733a353a224a616d6573223b7d', + true, + ], + [ + // Double encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} + '346633613833613a323237333634343336643661373332223a313a376233343a313a3763363a373233333634353a343a66337a343a323233643634663a373236333a666537333a393a6666372e7a3a313b', + true, + ], + // Real-world samples using multiple obfuscation techniques together + [ + // O😼:5:"TCPDF":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:34:"/tmp/../var/www/html/wp-config.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} + 'O%25F0%259F%2598%25BC:5:%22TCPDF%22:2:{s%25F0%259F%2598%25BC:12:%22%00*%00imagekeys%22;a%25F0%259F%2598%25BC:1:{i%25F0%259F%2598%25BC:0;s%25F0%259F%2598%25BC:34:%22/tmp/../var/www/html/wp-config.php%22;}s%25F0%259F%2598%25BC:10:%22%00*%00file_id%22;s%25F0%259F%2598%25BC:32:%22202cb962ac59075b964b07152d234b70%22;}', + true, + ], ]; } } From 269113c3e3d1276335cfb1868075bff06ad2afc8 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Mon, 6 Jan 2025 17:16:13 -0300 Subject: [PATCH 2/9] doc: add unreleased tag --- src/Helpers/Utils.php | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Helpers/Utils.php b/src/Helpers/Utils.php index da748c52ef..d64e86edba 100644 --- a/src/Helpers/Utils.php +++ b/src/Helpers/Utils.php @@ -167,10 +167,23 @@ public static function recursiveHexDecode($data) return $data; } + /** + * @unreleased + */ + public static function decodeString(string $data): string + { + $data = self::recursiveBase64Decode($data); + $data = self::recursiveHexDecode($data); + $data = self::recursiveUrlDecode($data); + + return $data; + } + /** * The regular expression attempts to capture the basic structure of all data types that can be serialized by PHP. * + * @unreleased Decode the string and remove any character not allowed in a serialized string * @since 3.19.3 Support all types of serialized data instead of only objects and arrays * @since 3.17.2 */ @@ -180,10 +193,12 @@ public static function containsSerializedDataRegex($data): bool return false; } - $data = self::recursiveUrlDecode($data); - $data = self::recursiveBase64Decode($data); - $data = self::recursiveHexDecode($data); + $data = self::decodeString($data); + /** + * This regular expression removes any special character that is not: + * a Letter (a-zA-Z), number (0-9), or any of the characters {}, :, ;, ", ', ., [, ], (, ), , + */ $data = preg_replace('/[^a-zA-Z0-9:{};"\'\.\[\]\(\),]/', '', $data); $pattern = '/ From 78a8ef61dde4a5c3117705049a57fac710d4b98c Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Mon, 6 Jan 2025 17:19:02 -0300 Subject: [PATCH 3/9] doc: add unreleased tag --- tests/Unit/Helpers/UtilsTest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Unit/Helpers/UtilsTest.php b/tests/Unit/Helpers/UtilsTest.php index 9fe2e5ce3d..318d9234cb 100644 --- a/tests/Unit/Helpers/UtilsTest.php +++ b/tests/Unit/Helpers/UtilsTest.php @@ -82,6 +82,7 @@ public function testMaybeSafeUnserialize($data, bool $expected) } /** + * @unreleased Test encoded strings and strings with special characters * @since 3.19.3 Test all types of serialized data * @since 3.17.2 */ From 999058d8125eb3887e4c24582f04c2a16076f586 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Mon, 6 Jan 2025 18:30:47 -0300 Subject: [PATCH 4/9] doc: update comment --- tests/Unit/Helpers/UtilsTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Unit/Helpers/UtilsTest.php b/tests/Unit/Helpers/UtilsTest.php index 318d9234cb..5e3915fa84 100644 --- a/tests/Unit/Helpers/UtilsTest.php +++ b/tests/Unit/Helpers/UtilsTest.php @@ -159,7 +159,7 @@ public function serializedDataProvider(): array ], // Real-world samples using multiple obfuscation techniques together [ - // O😼:5:"TCPDF":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:34:"/tmp/../var/www/html/wp-config.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} + // Double URL-encoded for O😼:5:"TCPDF":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:34:"/tmp/../var/www/html/wp-config.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} 'O%25F0%259F%2598%25BC:5:%22TCPDF%22:2:{s%25F0%259F%2598%25BC:12:%22%00*%00imagekeys%22;a%25F0%259F%2598%25BC:1:{i%25F0%259F%2598%25BC:0;s%25F0%259F%2598%25BC:34:%22/tmp/../var/www/html/wp-config.php%22;}s%25F0%259F%2598%25BC:10:%22%00*%00file_id%22;s%25F0%259F%2598%25BC:32:%22202cb962ac59075b964b07152d234b70%22;}', true, ], From 51ae9a5d39680acf1cd3e91e66c9daef657ba1a5 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Mon, 6 Jan 2025 21:47:52 -0300 Subject: [PATCH 5/9] refactor: simplify logic --- src/Helpers/Utils.php | 43 +------------------------------- tests/Unit/Helpers/UtilsTest.php | 26 ++----------------- 2 files changed, 3 insertions(+), 66 deletions(-) diff --git a/src/Helpers/Utils.php b/src/Helpers/Utils.php index d64e86edba..d389843791 100644 --- a/src/Helpers/Utils.php +++ b/src/Helpers/Utils.php @@ -139,47 +139,6 @@ public static function recursiveUrlDecode($data) return $decoded === $data ? $data : self::recursiveUrlDecode($decoded); } - /** - * @unreleased - */ - public static function recursiveBase64Decode($data) - { - $decodedData = base64_decode($data); - if ($decodedData !== false && base64_encode($decodedData) === $data) { - // If the decoded string is a valid Base64 string, decode again - return self::recursiveBase64Decode($decodedData); - } - - return $data; - } - - /** - * @unreleased - */ - public static function recursiveHexDecode($data) - { - $decodedData = hex2bin($data); - if ($decodedData !== false && bin2hex($decodedData) === $data) { - // If the decoded string is a valid Hex string, decode again - return self::recursiveHexDecode($decodedData); - } - - return $data; - } - - /** - * @unreleased - */ - public static function decodeString(string $data): string - { - $data = self::recursiveBase64Decode($data); - $data = self::recursiveHexDecode($data); - $data = self::recursiveUrlDecode($data); - - return $data; - } - - /** * The regular expression attempts to capture the basic structure of all data types that can be serialized by PHP. * @@ -193,7 +152,7 @@ public static function containsSerializedDataRegex($data): bool return false; } - $data = self::decodeString($data); + $data = self::recursiveUrlDecode($data); /** * This regular expression removes any special character that is not: diff --git a/tests/Unit/Helpers/UtilsTest.php b/tests/Unit/Helpers/UtilsTest.php index 5e3915fa84..c05c771d3b 100644 --- a/tests/Unit/Helpers/UtilsTest.php +++ b/tests/Unit/Helpers/UtilsTest.php @@ -113,7 +113,7 @@ public function serializedDataProvider(): array 'O :8:"stdClass":1:{s :4:"name";s :5:"James";}', true, ], - // BYPASS WITH SIMPLE METHODS + // Bypass with simple methods [ // backslash '\\' . serialize('backslash-bypass'), @@ -124,7 +124,7 @@ public function serializedDataProvider(): array '\\\\' . serialize('double-backslash-bypass'), true, ], - // BYPASS WITH ENCODING STRING METHOD #1 - URL-encoded + // Bypass with encoding string method - URL-encoded [ // Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} 'O%3A8%3A%22stdClass%22%3A1%3A%7Bs%3A4%3A%22name%22%3Bs%3A5%3A%22James%22%3B%7D', @@ -135,28 +135,6 @@ public function serializedDataProvider(): array 'O%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%253A4%253A%2522name%2522%253Bs%253A5%253A%2522James%2522%253B%257D', true, ], - // BYPASS WITH ENCODING STRING METHOD #2 - Base64 - [ - // Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} - 'Tzo4OiJzdGRDbGFzcyI6MTp7czo0OiJuYW1lIjtzOjU6IkphbWVzIjt9', - true, - ], - [ - // Double encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} - 'VHp6MDpPOmp6I3N0ZENsYXNzIjoxOntzOjQ6Im5hbWUiO3M6NToiSmFtZXMiO31z', - true, - ], - // BYPASS WITH ENCODING STRING METHOD #3 - Hex-encoded - [ - // Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} - '4f3a383a22737464436c617373223a313a7b733a343a226e616d65223b733a353a224a616d6573223b7d', - true, - ], - [ - // Double encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";} - '346633613833613a323237333634343336643661373332223a313a376233343a313a3763363a373233333634353a343a66337a343a323233643634663a373236333a666537333a393a6666372e7a3a313b', - true, - ], // Real-world samples using multiple obfuscation techniques together [ // Double URL-encoded for O😼:5:"TCPDF":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:34:"/tmp/../var/www/html/wp-config.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} From a7a849ca092cf72179f740bb30aa00b8fa4fbbd7 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Tue, 7 Jan 2025 11:52:41 -0300 Subject: [PATCH 6/9] refactor: remove redundant escape --- src/Helpers/Utils.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Helpers/Utils.php b/src/Helpers/Utils.php index d389843791..a396ad209b 100644 --- a/src/Helpers/Utils.php +++ b/src/Helpers/Utils.php @@ -158,11 +158,11 @@ public static function containsSerializedDataRegex($data): bool * This regular expression removes any special character that is not: * a Letter (a-zA-Z), number (0-9), or any of the characters {}, :, ;, ", ', ., [, ], (, ), , */ - $data = preg_replace('/[^a-zA-Z0-9:{};"\'\.\[\]\(\),]/', '', $data); + $data = preg_replace('/[^a-zA-Z0-9:{};"\'.\[\](),]/', '', $data); $pattern = '/ - (a:\d+:\{.*\}) | # Matches arrays (e.g: a:2:{i:0;s:5:"hello";i:1;i:42;}) - (O:\d+:"[^"]+":\{.*\}) | # Matches objects (e.g: O:8:"stdClass":1:{s:4:"name";s:5:"James";}) + (a:\d+:\{.*}) | # Matches arrays (e.g: a:2:{i:0;s:5:"hello";i:1;i:42;}) + (O:\d+:"[^"]+":\{.*}) | # Matches objects (e.g: O:8:"stdClass":1:{s:4:"name";s:5:"James";}) (s:\d+:"[^"]*";) | # Matches strings (e.g: s:5:"hello";) (i:\d+;) | # Matches integers (e.g: i:42;) (b:[01];) | # Matches booleans (e.g: b:1; or b:0;) From 0b034c5b5df1c7f19321a39059633c4f74fea9b0 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Tue, 7 Jan 2025 11:55:36 -0300 Subject: [PATCH 7/9] doc: add comment --- src/Helpers/Utils.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Helpers/Utils.php b/src/Helpers/Utils.php index a396ad209b..3a1d5d7235 100644 --- a/src/Helpers/Utils.php +++ b/src/Helpers/Utils.php @@ -130,9 +130,11 @@ public static function removeBackslashes($data) } /** + * Decode strings recursively to prevent double (or more) encoded strings + * * @unreleased */ - public static function recursiveUrlDecode($data) + public static function recursiveUrlDecode(string $data): string { $decoded = urldecode($data); From e2f18748a0a1393c80400f6348024ad46c3830c5 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Tue, 7 Jan 2025 12:02:16 -0300 Subject: [PATCH 8/9] test: change string sample --- tests/Unit/Helpers/UtilsTest.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/Unit/Helpers/UtilsTest.php b/tests/Unit/Helpers/UtilsTest.php index c05c771d3b..069f1b0f4a 100644 --- a/tests/Unit/Helpers/UtilsTest.php +++ b/tests/Unit/Helpers/UtilsTest.php @@ -135,10 +135,10 @@ public function serializedDataProvider(): array 'O%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%253A4%253A%2522name%2522%253Bs%253A5%253A%2522James%2522%253B%257D', true, ], - // Real-world samples using multiple obfuscation techniques together + // Sample using multiple obfuscation techniques together [ - // Double URL-encoded for O😼:5:"TCPDF":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:34:"/tmp/../var/www/html/wp-config.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} - 'O%25F0%259F%2598%25BC:5:%22TCPDF%22:2:{s%25F0%259F%2598%25BC:12:%22%00*%00imagekeys%22;a%25F0%259F%2598%25BC:1:{i%25F0%259F%2598%25BC:0;s%25F0%259F%2598%25BC:34:%22/tmp/../var/www/html/wp-config.php%22;}s%25F0%259F%2598%25BC:10:%22%00*%00file_id%22;s%25F0%259F%2598%25BC:32:%22202cb962ac59075b964b07152d234b70%22;}', + // Double URL-encoded for O😼:5:"CLASS":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:31:"/server/path/file-to-delete.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} + 'O%25F0%259F%2598%25BC%253A5%253A%2522CLASS%2522%253A2%253A%257Bs%25F0%259F%2598%25BC%253A12%253A%2522%2B%252A%2Bimagekeys%2522%253Ba%25F0%259F%2598%25BC%253A1%253A%257Bi%25F0%259F%2598%25BC%253A0%253Bs%25F0%259F%2598%25BC%253A31%253A%2522%252Fserver%252Fpath%252Ffile-to-delete.php%2522%253B%257Ds%25F0%259F%2598%25BC%253A10%253A%2522%2B%252A%2Bfile_id%2522%253Bs%25F0%259F%2598%25BC%253A32%253A%2522202cb962ac59075b964b07152d234b70%2522%253B%257D', true, ], ]; From ca2f8b3775daad7da3465adf6083088658800f51 Mon Sep 17 00:00:00 2001 From: Glauber Silva Date: Tue, 7 Jan 2025 12:09:14 -0300 Subject: [PATCH 9/9] tests: simplify sample and add a new one --- tests/Unit/Helpers/UtilsTest.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/Unit/Helpers/UtilsTest.php b/tests/Unit/Helpers/UtilsTest.php index 069f1b0f4a..8c6abf8c36 100644 --- a/tests/Unit/Helpers/UtilsTest.php +++ b/tests/Unit/Helpers/UtilsTest.php @@ -135,10 +135,15 @@ public function serializedDataProvider(): array 'O%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%253A4%253A%2522name%2522%253Bs%253A5%253A%2522James%2522%253B%257D', true, ], - // Sample using multiple obfuscation techniques together + // Samples using multiple obfuscation techniques together [ - // Double URL-encoded for O😼:5:"CLASS":2:{s😼:12:" * imagekeys";a😼:1:{i😼:0;s😼:31:"/server/path/file-to-delete.php";}s😼:10:" * file_id";s😼:32:"202cb962ac59075b964b07152d234b70";} - 'O%25F0%259F%2598%25BC%253A5%253A%2522CLASS%2522%253A2%253A%257Bs%25F0%259F%2598%25BC%253A12%253A%2522%2B%252A%2Bimagekeys%2522%253Ba%25F0%259F%2598%25BC%253A1%253A%257Bi%25F0%259F%2598%25BC%253A0%253Bs%25F0%259F%2598%25BC%253A31%253A%2522%252Fserver%252Fpath%252Ffile-to-delete.php%2522%253B%257Ds%25F0%259F%2598%25BC%253A10%253A%2522%2B%252A%2Bfile_id%2522%253Bs%25F0%259F%2598%25BC%253A32%253A%2522202cb962ac59075b964b07152d234b70%2522%253B%257D', + // Single URL-encoded for O😼:8:"stdClass":1:{s😼:4:"name";s😼:5:"James";} + 'O%F0%9F%98%BC%3A8%3A%22stdClass%22%3A1%3A%7Bs%F0%9F%98%BC%3A4%3A%22name%22%3Bs%F0%9F%98%BC%3A5%3A%22James%22%3B%7D', + true, + ], + [ + // Double URL-encoded for O😼:8:"stdClass":1:{s😼:4:"name";s😼:5:"James";} + 'O%25F0%259F%2598%25BC%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%25F0%259F%2598%25BC%253A4%253A%2522name%2522%253Bs%25F0%259F%2598%25BC%253A5%253A%2522James%2522%253B%257D', true, ], ];