Skip to content

Commit

Permalink
#286 data.apply(lambda x: x) -> data.obj.copy()
Browse files Browse the repository at this point in the history
  • Loading branch information
mdancho84 committed Feb 10, 2024
1 parent 4b01a82 commit 819a5f9
Show file tree
Hide file tree
Showing 12 changed files with 20 additions and 32 deletions.
4 changes: 2 additions & 2 deletions src/pytimetk/core/summarize_by_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,10 +328,10 @@ def _summarize_by_time_polars(
grouped = True
# Extract names from groupby object
groups = data.grouper.names # This can be a list of group names

# Convert the GroupBy object into a Polars DataFrame
df_pl = (
pl.from_pandas(data.apply(lambda x: x))
pl.from_pandas(data.obj.copy())
.group_by(groups, maintain_order=True)
.agg(pl.all().sort_by(date_column))
)
Expand Down
2 changes: 1 addition & 1 deletion src/pytimetk/feature_engineering/diffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def _augment_diffs_polars(

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
# Data is a GroupBy object, use apply to get a DataFrame
pandas_df = data.apply(lambda x: x)
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
# Data is already a DataFrame
pandas_df = data
Expand Down
11 changes: 4 additions & 7 deletions src/pytimetk/feature_engineering/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,20 +418,17 @@ def _augment_expanding_polars(
Augments the given dataframe with expanding calculations using the Polars library.
"""

# Create a fresh copy of the data, leaving the original untouched
data_copy = data.copy() if isinstance(data, pd.DataFrame) else data.obj.copy()

# Retrieve the group column names if the input data is a GroupBy object
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
group_names = data.grouper.names
else:
group_names = None

# Convert data into a Pandas DataFrame format for processing
if isinstance(data_copy, pd.core.groupby.generic.DataFrameGroupBy):
pandas_df = data_copy.apply(lambda x: x)
elif isinstance(data_copy, pd.DataFrame):
pandas_df = data_copy
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
pandas_df = data.copy()
else:
raise ValueError("Data must be a Pandas DataFrame or Pandas GroupBy object.")

Expand Down
3 changes: 1 addition & 2 deletions src/pytimetk/feature_engineering/fourier.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def augment_fourier(
raise TypeError(f"Invalid periods specification: type: {type(periods)}. Please use int, tuple, or list.")

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
data = data.apply(lambda x: x).reset_index(drop=True)
data = data.obj.copy().reset_index(drop=True)

# Reduce memory usage
if reduce_memory:
Expand Down Expand Up @@ -156,7 +156,6 @@ def _augment_fourier_pandas(
) -> pd.DataFrame:

df = data.copy()
# df.sort_values(by=[date_column], inplace=True)

scale_factor = date_to_seq_scale_factor(df, date_column).iloc[0].total_seconds()
if scale_factor == 0:
Expand Down
2 changes: 1 addition & 1 deletion src/pytimetk/feature_engineering/hilbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def apply_hilbert(pl_df):

# Convert the GroupBy object into a Polars DataFrame
df_pl = (
pl.from_pandas(data.apply(lambda x: x))
pl.from_pandas(data.obj.copy())
.group_by(groups, maintain_order=True)
.agg(pl.all().sort_by(date_column))
)
Expand Down
4 changes: 2 additions & 2 deletions src/pytimetk/feature_engineering/holiday_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _augment_holiday_signature_pandas(

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
# Data is a GroupBy object, use apply to get a DataFrame
pandas_df = data.apply(lambda x: x)
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
# Data is already a DataFrame
pandas_df = data
Expand Down Expand Up @@ -306,7 +306,7 @@ def _augment_holiday_signature_polars(

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
# Data is a GroupBy object, use apply to get a DataFrame
pandas_df = data.apply(lambda x: x)
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
# Data is already a DataFrame
pandas_df = data
Expand Down
6 changes: 1 addition & 5 deletions src/pytimetk/feature_engineering/lags.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,6 @@ def _augment_lags_pandas(

df = data.copy()

df.sort_values(by=[date_column], inplace=True)

for col in value_column:
for lag in lags:
df[f'{col}_lag_{lag}'] = df[col].shift(lag)
Expand All @@ -177,8 +175,6 @@ def _augment_lags_pandas(

df = data.copy()

df.sort_values(by=[*group_names, date_column], inplace=True)

for col in value_column:
for lag in lags:
df[f'{col}_lag_{lag}'] = df.groupby(group_names)[col].shift(lag)
Expand All @@ -194,7 +190,7 @@ def _augment_lags_polars(

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
# Data is a GroupBy object, use apply to get a DataFrame
pandas_df = data.apply(lambda x: x)
pandas_df = data.obj
elif isinstance(data, pd.DataFrame):
# Data is already a DataFrame
pandas_df = data
Expand Down
2 changes: 1 addition & 1 deletion src/pytimetk/feature_engineering/leads.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def _augment_leads_polars(
) -> pl.DataFrame:
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
# Data is a GroupBy object, use apply to get a DataFrame
pandas_df = data.apply(lambda x: x)
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
# Data is already a DataFrame
pandas_df = data
Expand Down
12 changes: 5 additions & 7 deletions src/pytimetk/feature_engineering/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def _augment_rolling_pandas(
# Group data if it's a GroupBy object; otherwise, prepare it for the rolling calculations
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
group_names = data.grouper.names
grouped = data_copy.sort_values(by=[*group_names, date_column]).groupby(group_names)
grouped = data_copy.groupby(group_names)

# Check if the data is grouped and threads are set to 1. If true, handle it without parallel processing.
if threads == 1:
Expand Down Expand Up @@ -408,8 +408,6 @@ def _augment_rolling_polars(
**kwargs,
) -> pd.DataFrame:

# Create a fresh copy of the data, leaving the original untouched
data_copy = data.copy() if isinstance(data, pd.DataFrame) else data.obj.copy()

# Retrieve the group column names if the input data is a GroupBy object
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
Expand All @@ -418,10 +416,10 @@ def _augment_rolling_polars(
group_names = None

# Convert data into a Pandas DataFrame format for processing
if isinstance(data_copy, pd.core.groupby.generic.DataFrameGroupBy):
pandas_df = data_copy.apply(lambda x: x)
elif isinstance(data_copy, pd.DataFrame):
pandas_df = data_copy
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
pandas_df = data.copy()
else:
raise ValueError("Data must be a Pandas DataFrame or Pandas GroupBy object.")

Expand Down
3 changes: 0 additions & 3 deletions src/pytimetk/finance/cmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,6 @@ def _calculate_cmo_polars(series: pl.Series, period=14):
delta = series.diff()

# Separate gains and losses
# gains = delta.apply(lambda x: x if x > 0 else 0)
# losses = delta.apply(lambda x: -x if x < 0 else 0)

gains = pl.when(delta > 0).then(delta).otherwise(0)
losses = pl.when(delta <= 0).then(-delta).otherwise(0)

Expand Down
2 changes: 1 addition & 1 deletion src/pytimetk/finance/rsi.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def _augment_rsi_polars(

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
# Data is a GroupBy object, use apply to get a DataFrame
pandas_df = data.apply(lambda x: x)
pandas_df = data.obj.copy()
elif isinstance(data, pd.DataFrame):
# Data is already a DataFrame
pandas_df = data.copy()
Expand Down
1 change: 1 addition & 0 deletions tests/test_get_frequency_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
('Y', False, 'A-DEC' , '365 days', 1.0, 'Y'),
('Y', True, 'Y' , '365 days', 1.0, 'Y')
])

def test_correct_frequency_inference(
freq, regular, inferred_unit, median_timedelta, median_scale, median_unit
):
Expand Down

0 comments on commit 819a5f9

Please sign in to comment.