Skip to main content

Python NumPy: How to Sum Columns in a 2D NumPy Array in Python

Summing along columns is a fundamental operation in data analysis. NumPy's axis parameter controls the direction of the operation, with axis=0 summing down each column.

Basic Column Sum

Use np.sum() with axis=0 to sum elements vertically:

import numpy as np

matrix = np.array([
[1, 2, 3],
[4, 5, 6]
])

col_sums = np.sum(matrix, axis=0)
print(col_sums)

Output:

[5 7 9]

Visualizing the Operation

import numpy as np

matrix = np.array([
[1, 2, 3], # Row 0
[4, 5, 6], # Row 1
[7, 8, 9] # Row 2
])

# ↓ ↓ ↓ axis=0 sums DOWN columns
# [1, 2, 3]
# + [4, 5, 6]
# + [7, 8, 9]
# = [12, 15, 18]

col_sums = np.sum(matrix, axis=0)
print(col_sums)

Output:

[12 15 18]

Method vs Function Syntax

Both approaches produce identical results:

import numpy as np

matrix = np.array([[1, 2, 3], [4, 5, 6]])

# Function syntax
result1 = np.sum(matrix, axis=0)

# Method syntax
result2 = matrix.sum(axis=0)

print(result1) # [5 7 9]
print(result2) # [5 7 9]

Output:

[5 7 9]
[5 7 9]

Understanding Axis Parameter

The axis parameter specifies which dimension to collapse:

import numpy as np

matrix = np.array([
[1, 2, 3],
[4, 5, 6]
])

print(f"Shape: {matrix.shape}") # (2, 3) (2 rows, 3 columns)

# axis=0: Collapse rows → result has shape (3,)
col_sums = matrix.sum(axis=0)
print(f"Column sums: {col_sums}") # [5 7 9]

# axis=1: Collapse columns → result has shape (2,)
row_sums = matrix.sum(axis=1)
print(f"Row sums: {row_sums}") # [6 15]

# No axis: Sum all elements → scalar
total = matrix.sum()
print(f"Total: {total}") # 21

Output:

Shape: (2, 3)
Column sums: [5 7 9]
Row sums: [ 6 15]
Total: 21

Axis Quick Reference

GoalAxisResult ShapeExample
Sum columns0(n_cols,)[5, 7, 9]
Sum rows1(n_rows,)[6, 15]
Sum allNonescalar21
tip

Think of axis=0 as "sum across axis 0 (rows)" which gives one value per column. The axis you specify is the one that disappears in the output.

Keeping Dimensions with keepdims

Preserve the original number of dimensions for broadcasting:

import numpy as np

matrix = np.array([
[1, 2, 3],
[4, 5, 6]
])

# Without keepdims: shape (3,)
col_sums = matrix.sum(axis=0)
print(f"Shape: {col_sums.shape}") # (3,)

# With keepdims: shape (1, 3)
col_sums_kept = matrix.sum(axis=0, keepdims=True)
print(f"Shape with keepdims: {col_sums_kept.shape}") # (1, 3)
print(col_sums_kept) # [[5 7 9]]

# Useful for normalization
normalized = matrix / matrix.sum(axis=0, keepdims=True)
print(normalized)
# [[0.2 0.28571429 0.33333333]
# [0.8 0.71428571 0.66666667]]

Output:

Shape: (3,)
Shape with keepdims: (1, 3)
[[5 7 9]]
[[0.2 0.28571429 0.33333333]
[0.8 0.71428571 0.66666667]]

Other Column Operations

The same axis pattern works with other aggregation functions:

import numpy as np

matrix = np.array([
[1, 5, 3],
[4, 2, 6],
[7, 8, 9]
])

# Column means
print(f"Mean: {matrix.mean(axis=0)}") # [4. 5. 6.]

# Column max
print(f"Max: {matrix.max(axis=0)}") # [7 8 9]

# Column min
print(f"Min: {matrix.min(axis=0)}") # [1 2 3]

# Column standard deviation
print(f"Std: {matrix.std(axis=0)}") # [2.449 2.449 2.449]

# Column product
print(f"Product: {matrix.prod(axis=0)}") # [28 80 162]

Output:

Mean: [4. 5. 6.]
Max: [7 8 9]
Min: [1 2 3]
Std: [2.44948974 2.44948974 2.44948974]
Product: [ 28 80 162]

Performance: Vectorized vs Loops

NumPy's vectorized operations are dramatically faster:

import numpy as np
import timeit

# Create large matrix
matrix = np.random.rand(1000, 1000)

# ❌ Slow: Python loops
def loop_sum():
rows, cols = matrix.shape
result = []
for c in range(cols):
total = sum(matrix[r, c] for r in range(rows))
result.append(total)
return result

# ✅ Fast: Vectorized
def numpy_sum():
return np.sum(matrix, axis=0)

loop_time = timeit.timeit(loop_sum, number=10)
numpy_time = timeit.timeit(numpy_sum, number=10)

print(f"Loop: {loop_time:.4f}s")
print(f"NumPy: {numpy_time:.4f}s")
print(f"Speedup: {loop_time/numpy_time:.0f}x")

Typical output:

Loop: 2.0510s
NumPy: 0.0032s
Speedup: 633x

Practical Examples

Normalizing Columns

import numpy as np

data = np.array([
[100, 50, 200],
[150, 75, 300],
[200, 100, 400]
])

# Normalize each column to sum to 1
col_sums = data.sum(axis=0)
normalized = data / col_sums

print("Normalized columns:")
print(normalized)
print(f"\nColumn sums verify: {normalized.sum(axis=0)}") # [1. 1. 1.]

Output:

Normalized columns:
[[0.22222222 0.22222222 0.22222222]
[0.33333333 0.33333333 0.33333333]
[0.44444444 0.44444444 0.44444444]]

Column sums verify: [1. 1. 1.]

Finding Column with Maximum Sum

import numpy as np

sales = np.array([
[100, 150, 200], # Store 1
[120, 140, 180], # Store 2
[90, 160, 220] # Store 3
])
products = ['A', 'B', 'C']

col_sums = sales.sum(axis=0)
best_product_idx = col_sums.argmax()

print(f"Total sales per product: {col_sums}")
print(f"Best selling product: {products[best_product_idx]}")

Output:

Total sales per product: [310 450 600]
Best selling product: C

Cumulative Column Sum

import numpy as np

matrix = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
])

# Running total down each column
cumsum = np.cumsum(matrix, axis=0)
print(cumsum)

Output:

[[ 1  2  3]
[ 5 7 9]
[12 15 18]]

3D Arrays

The same principle extends to higher dimensions:

import numpy as np

# 3D array: 2 layers, 3 rows, 4 columns
arr_3d = np.arange(24).reshape(2, 3, 4)

# Sum along first axis (across layers)
print(f"axis=0 shape: {arr_3d.sum(axis=0).shape}") # (3, 4)

# Sum along second axis (across rows within each layer)
print(f"axis=1 shape: {arr_3d.sum(axis=1).shape}") # (2, 4)

# Sum along third axis (across columns within each layer)
print(f"axis=2 shape: {arr_3d.sum(axis=2).shape}") # (2, 3)

Output:

axis=0 shape: (3, 4)
axis=1 shape: (2, 4)
axis=2 shape: (2, 3)

Summary

OperationCodeResult
Sum columnsnp.sum(matrix, axis=0)One value per column
Sum rowsnp.sum(matrix, axis=1)One value per row
Sum allnp.sum(matrix)Single scalar
Keep dimensionsaxis=0, keepdims=TruePreserves 2D shape

Use np.sum(matrix, axis=0) or matrix.sum(axis=0) for column sums. The vectorized operation is orders of magnitude faster than Python loops and produces cleaner, more readable code.