Skip to main content

Python NumPy: How to Check and Define NumPy Array Data Types in Python

Every NumPy array has a dtype attribute that determines how data is stored in memory. Understanding and controlling data types is essential for memory optimization, numerical precision, and compatibility with external libraries.

Checking Data Types with .dtype

The .dtype attribute reveals an array's data type:

import numpy as np

# Integer array
int_arr = np.array([1, 2, 3])
print(int_arr.dtype) # int64 (or int32 on Windows)

# Float array
float_arr = np.array([1.0, 2.0, 3.0])
print(float_arr.dtype) # float64

# Mixed types promote to most general
mixed = np.array([1, 2.5, 3])
print(mixed.dtype) # float64

# String array
strings = np.array(['apple', 'banana'])
print(strings.dtype) # <U6 (Unicode, 6 chars max)

Output:

int64
float64
float64
<U6

Detailed Type Information

import numpy as np

arr = np.array([1.0, 2.0, 3.0])

print(f"dtype: {arr.dtype}") # float64
print(f"dtype name: {arr.dtype.name}") # 'float64'
print(f"item size: {arr.dtype.itemsize} bytes") # 8
print(f"total bytes: {arr.nbytes}") # 24 (3 elements × 8 bytes)

Output:

dtype: float64
dtype name: float64
item size: 8 bytes
total bytes: 24

Defining Type at Creation

Specify dtype when creating arrays to control storage:

import numpy as np

# Explicit float32 (uses half the memory of float64)
arr = np.array([1, 2, 3], dtype='float32')
print(arr.dtype) # float32
print(arr) # [1. 2. 3.]

# Explicit integer type
arr = np.array([1.9, 2.1, 3.5], dtype='int32')
print(arr) # [1 2 3] (truncated!)

# Boolean
arr = np.array([0, 1, 2, 0], dtype='bool')
print(arr) # [False True True False]

Output:

float32
[1. 2. 3.]
[1 2 3]
[False True True False]

With Array Creation Functions

import numpy as np

# zeros with specific type
zeros = np.zeros((3, 3), dtype='int8')
print(zeros.dtype) # int8

# ones with specific type
ones = np.ones((2, 2), dtype='float32')
print(ones.dtype) # float32

# arange with specific type
sequence = np.arange(10, dtype='float16')
print(sequence.dtype) # float16

# linspace with specific type
linear = np.linspace(0, 1, 5, dtype='float32')
print(linear.dtype) # float32

Output:

int8
float32
float16
float32

Type Specification Formats

NumPy accepts multiple formats for specifying types:

import numpy as np

# All equivalent ways to specify float64
np.array([1, 2, 3], dtype='float64')
np.array([1, 2, 3], dtype=np.float64)
np.array([1, 2, 3], dtype=float)
np.array([1, 2, 3], dtype='f8') # shorthand

Type Shortcuts Reference

SymbolMeaningExamples
iSigned integeri1, i2, i4, i8
uUnsigned integeru1, u2, u4, u8
fFloating pointf2, f4, f8
cComplexc8, c16
bBooleanb or bool
SByte stringS10 (10 chars)
UUnicode stringU10 (10 chars)

The number indicates bytes: i4 = 4-byte (32-bit) integer, f8 = 8-byte (64-bit) float.

import numpy as np

# Using shorthand notation
arr_i4 = np.array([1, 2, 3], dtype='i4') # int32
arr_f4 = np.array([1, 2, 3], dtype='f4') # float32
arr_u1 = np.array([1, 2, 3], dtype='u1') # uint8

print(f"i4: {arr_i4.dtype}") # i4: int32
print(f"f4: {arr_f4.dtype}") # f4: float32
print(f"u1: {arr_u1.dtype}") # u1: uint8

Numeric Type Ranges

Understanding type limits prevents overflow:

import numpy as np

# Integer type limits
print(f"int8: {np.iinfo(np.int8).min} to {np.iinfo(np.int8).max}")
# int8: -128 to 127

print(f"int16: {np.iinfo(np.int16).min} to {np.iinfo(np.int16).max}")
# int16: -32768 to 32767

print(f"uint8: {np.iinfo(np.uint8).min} to {np.iinfo(np.uint8).max}")
# uint8: 0 to 255

# Float type precision
print(f"float32 precision: {np.finfo(np.float32).precision} digits")
# float32 precision: 6 digits

print(f"float64 precision: {np.finfo(np.float64).precision} digits")
# float64 precision: 15 digits

Output:

int8:  -128 to 127
int16: -32768 to 32767
uint8: 0 to 255
float32 precision: 6 digits
float64 precision: 15 digits

String Types: S vs U

NumPy supports both byte strings and Unicode strings:

import numpy as np

# Byte string (ASCII only)
byte_arr = np.array(['hello', 'world'], dtype='S')
print(byte_arr) # [b'hello' b'world']
print(byte_arr.dtype) # |S5 (5-byte string)

# Unicode string (default for text in Python 3)
unicode_arr = np.array(['hello', 'world'], dtype='U')
print(unicode_arr) # ['hello' 'world']
print(unicode_arr.dtype) # <U5 (5-character Unicode)

# Fixed-length strings
fixed = np.array(['hi', 'hello', 'hey'], dtype='U10')
print(fixed.dtype) # <U10 (max 10 chars)

Output:

[b'hello' b'world']
|S5
['hello' 'world']
<U5
<U10
tip

Use U (Unicode) for text data in Python 3. Use S (byte strings) only when working with binary data or legacy systems requiring ASCII.

String Length Inference

import numpy as np

# NumPy infers length from longest string
names = np.array(['Al', 'Bob', 'Charlotte'])
print(names.dtype) # <U9 (longest name is 9 chars)

# Shorter strings are padded internally
print(repr(names[0])) # 'Al' (but stored as 9 chars)

Output:

<U9
np.str_('Al')

Type Checking and Validation

Check If Specific Type

import numpy as np

arr = np.array([1.0, 2.0, 3.0])

# Direct comparison
print(arr.dtype == np.float64) # True

# Check type category
print(np.issubdtype(arr.dtype, np.floating)) # True
print(np.issubdtype(arr.dtype, np.integer)) # False
print(np.issubdtype(arr.dtype, np.number)) # True

Output:

True
True
False
True

Common Type Checks

import numpy as np

def describe_array(arr):
"""Describe array type characteristics."""
dtype = arr.dtype

info = {
'dtype': dtype.name,
'is_integer': np.issubdtype(dtype, np.integer),
'is_float': np.issubdtype(dtype, np.floating),
'is_numeric': np.issubdtype(dtype, np.number),
'is_bool': np.issubdtype(dtype, np.bool_),
'is_string': np.issubdtype(dtype, np.str_),
'bytes_per_element': dtype.itemsize
}
return info

# Test with different arrays
print(describe_array(np.array([1, 2, 3])))
print(describe_array(np.array([1.0, 2.0])))
print(describe_array(np.array(['a', 'b'])))

Output:

{'dtype': 'int64', 'is_integer': True, 'is_float': False, 'is_numeric': True, 'is_bool': False, 'is_string': False, 'bytes_per_element': 8}
{'dtype': 'float64', 'is_integer': False, 'is_float': True, 'is_numeric': True, 'is_bool': False, 'is_string': False, 'bytes_per_element': 8}
{'dtype': 'str32', 'is_integer': False, 'is_float': False, 'is_numeric': False, 'is_bool': False, 'is_string': True, 'bytes_per_element': 4}

Memory Optimization

Choose appropriate types to reduce memory usage:

import numpy as np

# Large array with default type
large_default = np.arange(1_000_000)
print(f"Default (int64): {large_default.nbytes / 1e6:.1f} MB")

# Same data with smaller type
large_int16 = np.arange(1_000_000, dtype='int16')
print(f"int16: {large_int16.nbytes / 1e6:.1f} MB")

large_int8 = np.arange(256, dtype='int8') # Only 256 values fit
print(f"int8: {large_int8.nbytes / 1e3:.1f} KB")

Output:

Default (int64): 8.0 MB
int16: 2.0 MB
int8: 0.3 KB

Choosing the Right Type

import numpy as np

def suggest_int_type(values):
"""Suggest smallest integer type that fits values."""
min_val, max_val = min(values), max(values)

if min_val >= 0: # Unsigned
if max_val <= 255:
return 'uint8'
elif max_val <= 65535:
return 'uint16'
elif max_val <= 4294967295:
return 'uint32'
else:
return 'uint64'
else: # Signed
if -128 <= min_val and max_val <= 127:
return 'int8'
elif -32768 <= min_val and max_val <= 32767:
return 'int16'
elif -2147483648 <= min_val and max_val <= 2147483647:
return 'int32'
else:
return 'int64'

# Usage
ages = [25, 30, 45, 60, 22]
print(suggest_int_type(ages)) # 'uint8'

temperatures = [-10, 5, 20, 35, -5]
print(suggest_int_type(temperatures)) # 'int8'

Output:

uint8
int8

Structured Arrays (Multiple Types)

Define arrays with multiple named fields of different types:

import numpy as np

# Define structured dtype
person_dtype = np.dtype([
('name', 'U20'),
('age', 'i4'),
('salary', 'f8')
])

# Create structured array
people = np.array([
('Alice', 30, 75000.0),
('Bob', 25, 65000.0),
('Charlie', 35, 85000.0)
], dtype=person_dtype)

# Access fields by name
print(people['name']) # ['Alice' 'Bob' 'Charlie']
print(people['age']) # [30 25 35]
print(people[0]) # ('Alice', 30, 75000.)

Output:

['Alice' 'Bob' 'Charlie']
[30 25 35]
('Alice', 30, 75000.0)

Quick Reference

TaskCode
Check typearr.dtype
Check type namearr.dtype.name
Bytes per elementarr.dtype.itemsize
Total bytesarr.nbytes
Set type at creationnp.array([...], dtype='f4')
Convert typearr.astype('int32')
Check if integernp.issubdtype(arr.dtype, np.integer)
Check if floatnp.issubdtype(arr.dtype, np.floating)
Get type limitsnp.iinfo(np.int8) / np.finfo(np.float32)

Summary

Always verify array types with .dtype when debugging numerical issues or unexpected behavior.

Specify dtype at creation for memory optimization-use float32 instead of float64 for ML workloads, and choose the smallest integer type that fits your data range.

Understanding type shortcuts like i4 and f8 makes code more concise while maintaining clarity.