Skip to main content

Python NumPy: How to Average Every N Elements in a NumPy Array

Grouping array elements and calculating means for each group is a common operation in signal processing, data downsampling, and time series analysis. NumPy's reshape method provides an efficient, loop-free solution.

The Reshape Trick

Transform the 1D array into a 2D matrix with N columns, then calculate the mean across each row:

import numpy as np

arr = np.array([10, 20, 30, 40, 50, 60])
n = 3

# Reshape to (2, 3), then mean each row
averages = arr.reshape(-1, n).mean(axis=1)

print(averages) # [20. 50.]

Step-by-Step Breakdown

import numpy as np

arr = np.array([10, 20, 30, 40, 50, 60])
n = 3

# Step 1: Reshape into groups of n
reshaped = arr.reshape(-1, n)
print("Reshaped:")
print(reshaped)
# [[10 20 30]
# [40 50 60]]

# Step 2: Calculate mean along axis=1 (each row)
averages = reshaped.mean(axis=1)
print(f"\nAverages: {averages}") # [20. 50.]

# Understanding axes:
# axis=0 → mean down columns: [25. 35. 45.]
# axis=1 → mean across rows: [20. 50.]
tip

The -1 in reshape(-1, n) tells NumPy to automatically calculate the number of rows needed based on the array length and specified column count.

Handling Non-Divisible Arrays

When the array length isn't evenly divisible by N, you must decide how to handle the remainder.

Option 1: Trim Excess Elements

Discard elements that don't form a complete group:

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6, 7]) # 7 elements
n = 3

# Calculate how many complete groups we can form
num_complete_groups = len(arr) // n
trim_length = num_complete_groups * n

# Trim to 6 elements (2 groups of 3)
trimmed = arr[:trim_length]
result = trimmed.reshape(-1, n).mean(axis=1)

print(f"Trimmed array: {trimmed}") # [1 2 3 4 5 6]
print(f"Averages: {result}") # [2. 5.]
# NOTE: Element 7 is excluded!

Option 2: Pad with NaN

Include remaining elements by padding with NaN and using nanmean:

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6, 7])
n = 3

# Calculate padding needed
remainder = len(arr) % n
if remainder != 0:
pad_size = n - remainder
arr_padded = np.pad(arr.astype(float), (0, pad_size),
constant_values=np.nan)
else:
arr_padded = arr.astype(float)

# Reshape and use nanmean to ignore NaN values
result = np.nanmean(arr_padded.reshape(-1, n), axis=1)

print(f"Padded array: {arr_padded}") # Padded array: [ 1. 2. 3. 4. 5. 6. 7. nan nan]
print(f"Averages: {result}") # Averages: [2. 5. 7.]
# Last group [7, nan, nan] averages to 7.0

Option 3: Pad with Zeros

If zeros are meaningful in your context:

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6, 7])
n = 3

remainder = len(arr) % n
if remainder != 0:
pad_size = n - remainder
arr_padded = np.pad(arr, (0, pad_size), constant_values=0)
else:
arr_padded = arr

result = arr_padded.reshape(-1, n).mean(axis=1)

print(f"Padded array: {arr_padded}") # Padded array: [1 2 3 4 5 6 7 0 0]
print(f"Averages: {result}") # Averages: [2. 5. 2.33333333]
# Last group [7, 0, 0] averages to ~2.33

Reusable Function

Wrap the logic in a flexible function:

import numpy as np

def average_every_n(arr, n, handle_remainder='trim'):
"""
Average every n consecutive elements.

Parameters:
arr: Input array
n: Group size
handle_remainder: 'trim', 'pad_nan', or 'pad_zero'
"""
arr = np.asarray(arr)
remainder = len(arr) % n

if remainder == 0:
return arr.reshape(-1, n).mean(axis=1)

if handle_remainder == 'trim':
trim_length = (len(arr) // n) * n
return arr[:trim_length].reshape(-1, n).mean(axis=1)

elif handle_remainder == 'pad_nan':
pad_size = n - remainder
padded = np.pad(arr.astype(float), (0, pad_size),
constant_values=np.nan)
return np.nanmean(padded.reshape(-1, n), axis=1)

elif handle_remainder == 'pad_zero':
pad_size = n - remainder
padded = np.pad(arr, (0, pad_size), constant_values=0)
return padded.reshape(-1, n).mean(axis=1)

# Usage
arr = np.array([1, 2, 3, 4, 5, 6, 7])

print(average_every_n(arr, 3, 'trim')) # [2. 5.]
print(average_every_n(arr, 3, 'pad_nan')) # [2. 5. 7.]
print(average_every_n(arr, 3, 'pad_zero')) # [2. 5. 2.33333333]

Other Aggregations

The same reshape pattern works with other operations:

import numpy as np

arr = np.array([10, 20, 30, 40, 50, 60])
n = 3

reshaped = arr.reshape(-1, n)

# Sum every n elements
sums = reshaped.sum(axis=1)
print(f"Sums: {sums}") # [60 150]

# Max every n elements
maxes = reshaped.max(axis=1)
print(f"Maxes: {maxes}") # [30 60]

# Min every n elements
mins = reshaped.min(axis=1)
print(f"Mins: {mins}") # [10 40]

# Standard deviation every n elements
stds = reshaped.std(axis=1)
print(f"Stds: {stds}") # [8.16... 8.16...]

Output:

Sums: [ 60 150]
Maxes: [30 60]
Mins: [10 40]
Stds: [8.16496581 8.16496581]

2D Array: Average Every N Rows

Extend the concept to average groups of rows:

import numpy as np

# 6 rows, 3 columns
matrix = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[10, 11, 12],
[13, 14, 15],
[16, 17, 18]
])

n = 2 # Average every 2 rows

# Reshape to (3, 2, 3) then mean along axis=1
result = matrix.reshape(-1, n, matrix.shape[1]).mean(axis=1)

print(result)

Output:

[[ 2.5  3.5  4.5]
[ 8.5 9.5 10.5]
[14.5 15.5 16.5]]

Practical Example: Signal Downsampling

import numpy as np

# Simulated high-frequency signal (1000 Hz)
np.random.seed(42)
signal = np.sin(np.linspace(0, 4*np.pi, 1000)) + np.random.normal(0, 0.1, 1000)

# Downsample to 100 Hz by averaging every 10 samples
downsampling_factor = 10
downsampled = signal.reshape(-1, downsampling_factor).mean(axis=1)

print(f"Original length: {len(signal)}") # 1000
print(f"Downsampled length: {len(downsampled)}") # 100

Output:

Original length: 1000
Downsampled length: 100

Performance Comparison

The reshape method is significantly faster than Python loops:

import numpy as np
import timeit

arr = np.random.rand(1_000_000)
n = 100

# NumPy reshape method
def numpy_method():
return arr.reshape(-1, n).mean(axis=1)

# Python loop method
def loop_method():
return np.array([arr[i:i+n].mean() for i in range(0, len(arr), n)])

numpy_time = timeit.timeit(numpy_method, number=100)
loop_time = timeit.timeit(loop_method, number=100)

print(f"NumPy reshape: {numpy_time:.4f}s")
print(f"Python loop: {loop_time:.4f}s")
print(f"Speedup: {loop_time/numpy_time:.1f}x")

Output:

NumPy reshape: 0.0469s
Python loop: 4.3649s
Speedup: 93.1x
note

The reshape method typically runs 10-50x faster than equivalent Python loops because it operates entirely in compiled C code without Python overhead.

Summary

StepOperationPurpose
1arr.reshape(-1, n)Group into n-element rows
2.mean(axis=1)Average each group
3Trim or pad (if needed)Handle non-divisible lengths

Use reshape(-1, n).mean(axis=1) for fast, vectorized binned averages. This pattern extends to other aggregations like sum(), max(), min(), and std(), making it a versatile tool for data downsampling and grouped statistics.