-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmutator.py
executable file
·210 lines (169 loc) · 7.46 KB
/
mutator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
from random import randint, choice, shuffle, random
import re
import subprocess
import os
class Mutator:
"""
This class aims to mutate the PHPT (PHP Test) file, specifically targeting the --FILE-- section.
The goal is to introduce mutations in various parts of the code:
- Special integers: -1, 0, PHP_INT_MAX, PHP_INT_MIN
- Special characters: random byte, special encoding
- Special class variables: random magic class variables
- Special values: null values, etc.
"""
def __init__(self):
pass
def extract_sec(self, test, section):
"""
Extract a specific section from the PHPT file, identified by the section header.
Args:
test: The full PHPT file content.
section: The section to extract (e.g., --FILE--).
Returns:
The content of the specified section or an empty string if not found.
"""
if section not in test:
return ""
start_idx = test.find(section) + len(section)
x = re.search("--([_A-Z]+)--", test[start_idx:])
end_idx = x.start() if x != None else len(test) - 1
ret = test[start_idx:start_idx + end_idx].strip("\n")
return ret
"""
`mr` means `mutation rule`
Below are various mutation rules applied to the PHP code.
"""
def _mr_arith_operators(self, phpcode):
"""
Randomly mutate arithmetic operators such as +, -, *, /, %, **.
99.9% of the time, this function will return the original PHP code without changes.
"""
if random() > 0.001:
return phpcode
# Regular expression to match arithmetic operators
target_regex = r'\+\+|[-*/%]|\*\*'
replacements = ['+', '-', '*', '/', '%', '**']
victims = re.findall(target_regex, phpcode)
if len(victims) == 0:
return phpcode
# Randomly replace one arithmetic operator
phpcode = phpcode.replace(choice(victims), choice(replacements))
return phpcode
def _mr_assign_operators(self, phpcode):
"""
Randomly mutate assignment operators such as +=, -=, *=, /=, %=.
99.9% of the time, this function will return the original PHP code without changes.
"""
if random() > 0.001:
return phpcode
# Regular expression to match assignment operators
target_regex = r'\+=|-=|\*=|/=|%='
replacements = ['+=', '-=', '*=', '/=', '%=']
# Find all assignment operators in the PHP code
victims = re.findall(target_regex, phpcode)
if len(victims) == 0:
return phpcode
# Randomly select a victim and a replacement operator
victim = choice(victims)
replace = choice([op for op in replacements if op != victim])
# Replace a randomly chosen occurrence of the victim operator
phpcode = re.sub(re.escape(victim), replace, phpcode, 1)
return phpcode
def _mr_logical_operators(self, phpcode):
"""
Randomly mutate logical operators such as 'and', 'or', 'xor', '&&', '||'.
99.9% of the time, this function will return the original PHP code without changes.
"""
if random() > 0.001:
return phpcode
# Regular expression to match logical operators
target_regex = r'\band\b|\bor\b|\bxor\b|&&|\|\|'
replacements = ['and', 'or', 'xor', '&&', '||']
# Find all logical operators in the PHP code
victims = re.findall(target_regex, phpcode)
if len(victims) == 0:
return phpcode
# Randomly select a victim and a replacement operator
victim = choice(victims)
replace = choice([op for op in replacements if op != victim])
# Replace a randomly chosen occurrence of the logical operator
phpcode = re.sub(re.escape(victim), replace, phpcode, 1)
return phpcode
def _mr_integer(self, phpcode):
"""
Randomly mutate integer expressions to special boundary values like -1, 0, PHP_INT_MAX, etc.
99.9% of the time, this function will return the original PHP code without changes.
"""
if random() > 0.001:
return phpcode
# Regular expression to match integers (in decimal, octal, or hexadecimal)
target_regex = r'(?<![a-zA-Z0-9_])(?:0x[0-9a-fA-F]+|0[0-7]*|[1-9][0-9]*|0)(?![a-zA-Z0-9_])'
replacements = ['-1', '0', 'PHP_INT_MAX', 'PHP_INT_MIN', 'PHP_FLOAT_MIN', 'PHP_FLOAT_MAX', 'NULL', 'NAN', 'INF']
victims = re.findall(target_regex, phpcode)
if len(victims) == 0:
return phpcode
# Randomly replace one occurrence of an integer
victim = choice(victims)
replace = choice(replacements)
phpcode = re.sub(re.escape(victim), replace, phpcode, 1)
return phpcode
def _mr_string(self, phpcode):
"""
Randomly mutate string literals with special values like random bytes or special encoding.
99% of the time, this function will return the original PHP code without changes.
"""
if random() > 0.01:
return phpcode
# Regular expression to match single and double-quoted strings
target_regex = r"'([^'\\]+(\\.[^'\\]*)*)'|\"([^\"\\]+(\\.[^\"\\]*)*)\""
replacements = [f"'{chr(randint(0, 255))}'", 'NULL', "''", "'?~K?~U'", "'test\\0test'"]
# Find all string literals in the PHP code
victims = re.findall(target_regex, phpcode)
# Flatten the list to get the full match
victims = [match[0] if match[0] else match[2] for match in victims]
if len(victims) == 0:
return phpcode
# Randomly replace one occurrence of a string
victim = choice(victims)
replace = choice(replacements)
phpcode = re.sub(re.escape(victim), replace, phpcode, 1)
return phpcode
def _mr_variable(self, phpcode):
"""
Randomly mutate variables by replacing them with other variables.
99.5% of the time, this function will return the original PHP code without changes.
"""
if random() > 0.005:
return phpcode
# Regular expression to match variables
target_regex = r'\$\w+'
variables = re.findall(target_regex, phpcode)
if len(variables) == 0:
return phpcode
# Randomly select a victim and a replacement variable
victim = choice(variables)
replace = choice(variables)
# Replace a random occurrence of the victim variable
occurrences = [m.start() for m in re.finditer(re.escape(victim), phpcode)]
if not occurrences:
return phpcode
num_replacements = choice(range(1, len(occurrences) + 1))
selected_replacements = set(choice(occurrences) for _ in range(num_replacements))
result = []
last_index = 0
for i, char in enumerate(phpcode):
if i in selected_replacements:
result.append(phpcode[last_index:i])
result.append(replace)
last_index = i + len(victim)
result.append(phpcode[last_index:])
return ''.join(result)
def mutate(self, phpcode):
# Apply all mutation rules
phpcode = self._mr_arith_operators(phpcode)
phpcode = self._mr_assign_operators(phpcode)
phpcode = self._mr_logical_operators(phpcode)
phpcode = self._mr_integer(phpcode)
phpcode = self._mr_string(phpcode)
phpcode = self._mr_variable(phpcode)
return phpcode