Skip to content

Commit fb5f0a1

Browse files
Fix TypeError when a variable name matches a builtin python function (#558)
* fix thrown TypeError when a variable name matches a builtin python function Example: A dataset with a column named 'exec' failed with: ValueError: Error from parse_expr with transformed code: "(Float ('86.76248' )-exec )" ... snip ... TypeError: unsupported operand type(s) for -: 'Float' and 'builtin_function_or_method' * Ensure backwards compatibility for `pysr2sympy` and use same method * Fix potential issue with list ordering * Combine builtin variable names test with noisy data test * Fix builtin variable names test --------- Co-authored-by: MilesCranmer <miles.cranmer@gmail.com>
1 parent efffd9b commit fb5f0a1

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

pysr/export_sympy.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,29 @@
5757
}
5858

5959

60+
def create_sympy_symbols_map(
61+
feature_names_in: List[str],
62+
) -> Dict[str, sympy.Symbol]:
63+
return {variable: sympy.Symbol(variable) for variable in feature_names_in}
64+
65+
6066
def create_sympy_symbols(
6167
feature_names_in: List[str],
6268
) -> List[sympy.Symbol]:
6369
return [sympy.Symbol(variable) for variable in feature_names_in]
6470

6571

6672
def pysr2sympy(
67-
equation: str, *, extra_sympy_mappings: Optional[Dict[str, Callable]] = None
73+
equation: str,
74+
*,
75+
feature_names_in: Optional[List[str]] = None,
76+
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
6877
):
78+
if feature_names_in is None:
79+
feature_names_in = []
6980
local_sympy_mappings = {
70-
**(extra_sympy_mappings if extra_sympy_mappings else {}),
81+
**create_sympy_symbols_map(feature_names_in),
82+
**(extra_sympy_mappings if extra_sympy_mappings is not None else {}),
7183
**sympy_mappings,
7284
}
7385

pysr/sr.py

+1
Original file line numberDiff line numberDiff line change
@@ -2226,6 +2226,7 @@ def get_hof(self):
22262226
for _, eqn_row in output.iterrows():
22272227
eqn = pysr2sympy(
22282228
eqn_row["equation"],
2229+
feature_names_in=self.feature_names_in_,
22292230
extra_sympy_mappings=self.extra_sympy_mappings,
22302231
)
22312232
sympy_format.append(eqn)

pysr/test/test.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_warm_start_set_at_init(self):
272272
regressor = PySRRegressor(warm_start=True, max_evals=10)
273273
regressor.fit(self.X, y)
274274

275-
def test_noisy(self):
275+
def test_noisy_builtin_variable_names(self):
276276
y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
277277
model = PySRRegressor(
278278
# Test that passing a single operator works:
@@ -289,9 +289,12 @@ def test_noisy(self):
289289
model.set_params(model_selection="best")
290290
# Also try without a temp equation file:
291291
model.set_params(temp_equation_file=False)
292-
model.fit(self.X, y)
292+
# We also test builtin variable names
293+
model.fit(self.X, y, variable_names=["exec", "hash", "x3", "x4", "x5"])
293294
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
294295
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
296+
self.assertIn("exec", model.latex()[0])
297+
self.assertIn("hash", model.latex()[1])
295298

296299
def test_pandas_resample_with_nested_constraints(self):
297300
X = pd.DataFrame(

0 commit comments

Comments
 (0)