-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrows_&_cols.py
39 lines (30 loc) · 1.26 KB
/
rows_&_cols.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
import numpy as np
import timeit
# Generate a large DataFrame
np.random.seed(0)
df_large = pd.DataFrame(np.random.randint(1, 100, size=(100000, 3)),
columns=['A', 'B', 'C'])
# Repeat the timing experiment with this larger DataFrame
def setup_large():
# Recreate the large DataFrame
df_large = pd.DataFrame(np.random.randint(1, 100, size=(100000, 3)),
columns=['A', 'B', 'C'])
def complex_func(row):
if row['A'] > row['B']:
return row['A'] + row['C']
else:
return row['B'] - row['C']
return df_large, complex_func
def apply_function_large():
df_large, complex_func = setup_large()
df_large['D_apply'] = df_large.apply(complex_func, axis=1)
def vectorized_function_large():
df_large, _ = setup_large()
df_large['D_vect'] = np.where(df_large['A'] > df_large['B'],
df_large['A'] + df_large['C'], df_large['B'] - df_large['C'])
# Calculate the time with timeit for the large DataFrame
time_apply_large = timeit.timeit(apply_function_large, number=1)
time_vect_large = timeit.timeit(vectorized_function_large, number=1)
print(f'Apply: {time_apply_large}s')
print(f'Vectorization: {time_vect_large}s')