Skip to main content

Python NumPy: How to Calculate Averages of Consecutive Subarrays in NumPy

Averaging consecutive groups of elements is essential for downsampling signals, reducing data resolution, and creating binned statistics. NumPy's reshape method provides an efficient, vectorized solution without Python loops.

The Reshape Trick

Transform the 1D array into a 2D matrix where each row contains k consecutive elements, then compute the mean of each row:

import numpy as np

arr = np.array([10, 20, 30, 40, 50, 60])
k = 3

# Reshape to rows of k elements, then average each row
result = arr.reshape(-1, k).mean(axis=1)

print(result) # [20. 50.]

Step-by-Step Breakdown

import numpy as np

arr = np.array([10, 20, 30, 40, 50, 60])
k = 3

# Step 1: Reshape into groups of k
reshaped = arr.reshape(-1, k)
print("Reshaped:")
print(reshaped)
# [[10 20 30]
# [40 50 60]]

# Step 2: Calculate mean along axis=1 (across each row)
averages = reshaped.mean(axis=1)
print(f"\nAverages: {averages}") # [20. 50.]

# Axis explanation:
# axis=0 → mean down columns: [25. 35. 45.]
# axis=1 → mean across rows: [20. 50.]

Output:

Reshaped:
[[10 20 30]
[40 50 60]]

Averages: [20. 50.]
tip

The -1 in reshape(-1, k) tells NumPy to calculate the number of rows automatically based on the array length and specified column count.

Handling Non-Divisible Arrays

When the array length isn't evenly divisible by k, you need a strategy for the remaining elements.

Option 1: Trim Excess Elements

Discard elements that don't form a complete group:

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
k = 3

# Calculate remainder
remainder = len(arr) % k

# Trim if necessary
if remainder:
arr_trimmed = arr[:-remainder]
else:
arr_trimmed = arr

result = arr_trimmed.reshape(-1, k).mean(axis=1)
print(f"Trimmed array: {arr_trimmed}") # Trimmed array: [1 2 3 4 5 6]
print(f"Averages: {result}") # Averages: [2. 5.]
# Elements 7, 8 are excluded

Option 2: Pad with NaN and Use nanmean

Include all elements by padding with NaN:

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
k = 3

remainder = len(arr) % k
if remainder:
pad_size = k - remainder
arr_padded = np.pad(arr.astype(float), (0, pad_size),
constant_values=np.nan)
else:
arr_padded = arr.astype(float)

# Use nanmean to ignore NaN values
result = np.nanmean(arr_padded.reshape(-1, k), axis=1)

print(f"Padded: {arr_padded}") # Padded: [ 1. 2. 3. 4. 5. 6. 7. 8. nan]
print(f"Averages: {result}") # Averages: [2. 5. 7.5]
# Last group [7, 8, nan] averages to 7.5

Option 3: Pad with Zeros

When zeros are meaningful in your context:

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
k = 3

remainder = len(arr) % k
if remainder:
pad_size = k - remainder
arr_padded = np.pad(arr, (0, pad_size), constant_values=0)
else:
arr_padded = arr

result = arr_padded.reshape(-1, k).mean(axis=1)

print(f"Padded: {arr_padded}") # Padded: [1 2 3 4 5 6 7 8 0]
print(f"Averages: {result}") # Averages: [2. 5. 5.]
# Last group [7, 8, 0] averages to 5.0

Handling Missing Data (NaN Values)

When your array already contains NaN values, use np.nanmean():

import numpy as np

arr = np.array([1.0, 2.0, np.nan, 4.0, 5.0, 6.0])
k = 3

result = np.nanmean(arr.reshape(-1, k), axis=1)

print(result) # [1.5 5. ]

# First group [1, 2, nan] → mean of [1, 2] = 1.5
# Second group [4, 5, 6] → mean = 5.0

Reusable Function

Wrap the logic in a flexible function:

import numpy as np

def average_consecutive(arr, k, handle_remainder='trim'):
"""
Average every k consecutive elements.

Parameters:
arr: Input array
k: Group size
handle_remainder: 'trim', 'pad_nan', or 'pad_zero'

Returns:
Array of group averages
"""
arr = np.asarray(arr, dtype=float)
remainder = len(arr) % k

if remainder == 0:
return arr.reshape(-1, k).mean(axis=1)

if handle_remainder == 'trim':
return arr[:-remainder].reshape(-1, k).mean(axis=1)

elif handle_remainder == 'pad_nan':
pad_size = k - remainder
padded = np.pad(arr, (0, pad_size), constant_values=np.nan)
return np.nanmean(padded.reshape(-1, k), axis=1)

elif handle_remainder == 'pad_zero':
pad_size = k - remainder
padded = np.pad(arr, (0, pad_size), constant_values=0)
return padded.reshape(-1, k).mean(axis=1)

raise ValueError(f"Unknown remainder handling: {handle_remainder}")

# Usage
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

print(average_consecutive(arr, 3, 'trim')) # [2. 5.]
print(average_consecutive(arr, 3, 'pad_nan')) # [2. 5. 7.5]
print(average_consecutive(arr, 3, 'pad_zero')) # [2. 5. 5.]

Other Aggregations

The same reshape pattern works with other reduction operations:

import numpy as np

arr = np.array([10, 20, 30, 40, 50, 60])
k = 3

reshaped = arr.reshape(-1, k)

# Sum of consecutive groups
sums = reshaped.sum(axis=1)
print(f"Sums: {sums}") # [60 150]

# Max of consecutive groups
maxes = reshaped.max(axis=1)
print(f"Maxes: {maxes}") # [30 60]

# Min of consecutive groups
mins = reshaped.min(axis=1)
print(f"Mins: {mins}") # [10 40]

# Standard deviation of consecutive groups
stds = reshaped.std(axis=1)
print(f"Stds: {stds}") # [8.16496581 8.16496581]

# Median of consecutive groups
medians = np.median(reshaped, axis=1)
print(f"Medians: {medians}") # [20. 50.]

Output:

Sums: [ 60 150]
Maxes: [30 60]
Mins: [10 40]
Stds: [8.16496581 8.16496581]
Medians: [20. 50.]

2D Arrays: Average Consecutive Rows

Extend the concept to average groups of rows in a matrix:

import numpy as np

# 6 rows, 4 columns
matrix = np.arange(24).reshape(6, 4)
print("Original:")
print(matrix)

k = 2 # Average every 2 rows

# Reshape to (n_groups, k, n_cols) then mean along axis=1
result = matrix.reshape(-1, k, matrix.shape[1]).mean(axis=1)
print("\nAveraged (every 2 rows):")
print(result)

Output:

Original:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]

Averaged (every 2 rows):
[[ 2. 3. 4. 5.]
[10. 11. 12. 13.]
[18. 19. 20. 21.]]

Practical Example: Signal Downsampling

import numpy as np

# Simulated high-frequency signal (1000 Hz)
np.random.seed(42)
time = np.linspace(0, 1, 1000)
signal = np.sin(2 * np.pi * 5 * time) + np.random.normal(0, 0.1, 1000)

# Downsample to 100 Hz by averaging every 10 samples
k = 10
downsampled = signal.reshape(-1, k).mean(axis=1)
downsampled_time = time[::k]

print(f"Original: {len(signal)} samples") # 1000
print(f"Downsampled: {len(downsampled)} samples") # 100

Performance Comparison

The reshape method significantly outperforms Python loops:

import numpy as np
import timeit

arr = np.random.rand(1_000_000)
k = 100

# NumPy reshape method
def numpy_method():
return arr.reshape(-1, k).mean(axis=1)

# Python loop method
def loop_method():
return np.array([arr[i:i+k].mean() for i in range(0, len(arr), k)])

numpy_time = timeit.timeit(numpy_method, number=100)
loop_time = timeit.timeit(loop_method, number=100)

print(f"NumPy reshape: {numpy_time:.4f}s")
print(f"Python loop: {loop_time:.4f}s")
print(f"Speedup: {loop_time/numpy_time:.1f}x")

Output:

NumPy reshape: 0.1483s
Python loop: 12.2428s
Speedup: 82.5x
note

The reshape method typically runs 10-50x faster than Python loops because it operates entirely in compiled C code without Python interpreter overhead.

Summary

StepCodePurpose
Group elementsarr.reshape(-1, k)Create rows of k elements
Calculate average.mean(axis=1)Average each group
Handle remainderTrim or padDeal with non-divisible lengths
Handle NaNnp.nanmean()Ignore missing values

Use reshape(-1, k).mean(axis=1) for efficient, vectorized averaging of consecutive subarrays. This pattern extends naturally to other aggregations like sum(), max(), min(), and std(), making it a versatile tool for signal processing and data downsampling.