Python NumPy: How to Check and Define NumPy Array Data Types in Python
Every NumPy array has a dtype attribute that determines how data is stored in memory. Understanding and controlling data types is essential for memory optimization, numerical precision, and compatibility with external libraries.
Checking Data Types with .dtype
The .dtype attribute reveals an array's data type:
import numpy as np
# Integer array
int_arr = np.array([1, 2, 3])
print(int_arr.dtype) # int64 (or int32 on Windows)
# Float array
float_arr = np.array([1.0, 2.0, 3.0])
print(float_arr.dtype) # float64
# Mixed types promote to most general
mixed = np.array([1, 2.5, 3])
print(mixed.dtype) # float64
# String array
strings = np.array(['apple', 'banana'])
print(strings.dtype) # <U6 (Unicode, 6 chars max)
Output:
int64
float64
float64
<U6
Detailed Type Information
import numpy as np
arr = np.array([1.0, 2.0, 3.0])
print(f"dtype: {arr.dtype}") # float64
print(f"dtype name: {arr.dtype.name}") # 'float64'
print(f"item size: {arr.dtype.itemsize} bytes") # 8
print(f"total bytes: {arr.nbytes}") # 24 (3 elements × 8 bytes)
Output:
dtype: float64
dtype name: float64
item size: 8 bytes
total bytes: 24
Defining Type at Creation
Specify dtype when creating arrays to control storage:
import numpy as np
# Explicit float32 (uses half the memory of float64)
arr = np.array([1, 2, 3], dtype='float32')
print(arr.dtype) # float32
print(arr) # [1. 2. 3.]
# Explicit integer type
arr = np.array([1.9, 2.1, 3.5], dtype='int32')
print(arr) # [1 2 3] (truncated!)
# Boolean
arr = np.array([0, 1, 2, 0], dtype='bool')
print(arr) # [False True True False]
Output:
float32
[1. 2. 3.]
[1 2 3]
[False True True False]
With Array Creation Functions
import numpy as np
# zeros with specific type
zeros = np.zeros((3, 3), dtype='int8')
print(zeros.dtype) # int8
# ones with specific type
ones = np.ones((2, 2), dtype='float32')
print(ones.dtype) # float32
# arange with specific type
sequence = np.arange(10, dtype='float16')
print(sequence.dtype) # float16
# linspace with specific type
linear = np.linspace(0, 1, 5, dtype='float32')
print(linear.dtype) # float32
Output:
int8
float32
float16
float32
Type Specification Formats
NumPy accepts multiple formats for specifying types:
import numpy as np
# All equivalent ways to specify float64
np.array([1, 2, 3], dtype='float64')
np.array([1, 2, 3], dtype=np.float64)
np.array([1, 2, 3], dtype=float)
np.array([1, 2, 3], dtype='f8') # shorthand
Type Shortcuts Reference
| Symbol | Meaning | Examples |
|---|---|---|
i | Signed integer | i1, i2, i4, i8 |
u | Unsigned integer | u1, u2, u4, u8 |
f | Floating point | f2, f4, f8 |
c | Complex | c8, c16 |
b | Boolean | b or bool |
S | Byte string | S10 (10 chars) |
U | Unicode string | U10 (10 chars) |
The number indicates bytes: i4 = 4-byte (32-bit) integer, f8 = 8-byte (64-bit) float.
import numpy as np
# Using shorthand notation
arr_i4 = np.array([1, 2, 3], dtype='i4') # int32
arr_f4 = np.array([1, 2, 3], dtype='f4') # float32
arr_u1 = np.array([1, 2, 3], dtype='u1') # uint8
print(f"i4: {arr_i4.dtype}") # i4: int32
print(f"f4: {arr_f4.dtype}") # f4: float32
print(f"u1: {arr_u1.dtype}") # u1: uint8
Numeric Type Ranges
Understanding type limits prevents overflow:
import numpy as np
# Integer type limits
print(f"int8: {np.iinfo(np.int8).min} to {np.iinfo(np.int8).max}")
# int8: -128 to 127
print(f"int16: {np.iinfo(np.int16).min} to {np.iinfo(np.int16).max}")
# int16: -32768 to 32767
print(f"uint8: {np.iinfo(np.uint8).min} to {np.iinfo(np.uint8).max}")
# uint8: 0 to 255
# Float type precision
print(f"float32 precision: {np.finfo(np.float32).precision} digits")
# float32 precision: 6 digits
print(f"float64 precision: {np.finfo(np.float64).precision} digits")
# float64 precision: 15 digits
Output:
int8: -128 to 127
int16: -32768 to 32767
uint8: 0 to 255
float32 precision: 6 digits
float64 precision: 15 digits
String Types: S vs U
NumPy supports both byte strings and Unicode strings:
import numpy as np
# Byte string (ASCII only)
byte_arr = np.array(['hello', 'world'], dtype='S')
print(byte_arr) # [b'hello' b'world']
print(byte_arr.dtype) # |S5 (5-byte string)
# Unicode string (default for text in Python 3)
unicode_arr = np.array(['hello', 'world'], dtype='U')
print(unicode_arr) # ['hello' 'world']
print(unicode_arr.dtype) # <U5 (5-character Unicode)
# Fixed-length strings
fixed = np.array(['hi', 'hello', 'hey'], dtype='U10')
print(fixed.dtype) # <U10 (max 10 chars)
Output:
[b'hello' b'world']
|S5
['hello' 'world']
<U5
<U10
Use U (Unicode) for text data in Python 3. Use S (byte strings) only when working with binary data or legacy systems requiring ASCII.
String Length Inference
import numpy as np
# NumPy infers length from longest string
names = np.array(['Al', 'Bob', 'Charlotte'])
print(names.dtype) # <U9 (longest name is 9 chars)
# Shorter strings are padded internally
print(repr(names[0])) # 'Al' (but stored as 9 chars)
Output:
<U9
np.str_('Al')
Type Checking and Validation
Check If Specific Type
import numpy as np
arr = np.array([1.0, 2.0, 3.0])
# Direct comparison
print(arr.dtype == np.float64) # True
# Check type category
print(np.issubdtype(arr.dtype, np.floating)) # True
print(np.issubdtype(arr.dtype, np.integer)) # False
print(np.issubdtype(arr.dtype, np.number)) # True
Output:
True
True
False
True
Common Type Checks
import numpy as np
def describe_array(arr):
"""Describe array type characteristics."""
dtype = arr.dtype
info = {
'dtype': dtype.name,
'is_integer': np.issubdtype(dtype, np.integer),
'is_float': np.issubdtype(dtype, np.floating),
'is_numeric': np.issubdtype(dtype, np.number),
'is_bool': np.issubdtype(dtype, np.bool_),
'is_string': np.issubdtype(dtype, np.str_),
'bytes_per_element': dtype.itemsize
}
return info
# Test with different arrays
print(describe_array(np.array([1, 2, 3])))
print(describe_array(np.array([1.0, 2.0])))
print(describe_array(np.array(['a', 'b'])))
Output:
{'dtype': 'int64', 'is_integer': True, 'is_float': False, 'is_numeric': True, 'is_bool': False, 'is_string': False, 'bytes_per_element': 8}
{'dtype': 'float64', 'is_integer': False, 'is_float': True, 'is_numeric': True, 'is_bool': False, 'is_string': False, 'bytes_per_element': 8}
{'dtype': 'str32', 'is_integer': False, 'is_float': False, 'is_numeric': False, 'is_bool': False, 'is_string': True, 'bytes_per_element': 4}
Memory Optimization
Choose appropriate types to reduce memory usage:
import numpy as np
# Large array with default type
large_default = np.arange(1_000_000)
print(f"Default (int64): {large_default.nbytes / 1e6:.1f} MB")
# Same data with smaller type
large_int16 = np.arange(1_000_000, dtype='int16')
print(f"int16: {large_int16.nbytes / 1e6:.1f} MB")
large_int8 = np.arange(256, dtype='int8') # Only 256 values fit
print(f"int8: {large_int8.nbytes / 1e3:.1f} KB")
Output:
Default (int64): 8.0 MB
int16: 2.0 MB
int8: 0.3 KB
Choosing the Right Type
import numpy as np
def suggest_int_type(values):
"""Suggest smallest integer type that fits values."""
min_val, max_val = min(values), max(values)
if min_val >= 0: # Unsigned
if max_val <= 255:
return 'uint8'
elif max_val <= 65535:
return 'uint16'
elif max_val <= 4294967295:
return 'uint32'
else:
return 'uint64'
else: # Signed
if -128 <= min_val and max_val <= 127:
return 'int8'
elif -32768 <= min_val and max_val <= 32767:
return 'int16'
elif -2147483648 <= min_val and max_val <= 2147483647:
return 'int32'
else:
return 'int64'
# Usage
ages = [25, 30, 45, 60, 22]
print(suggest_int_type(ages)) # 'uint8'
temperatures = [-10, 5, 20, 35, -5]
print(suggest_int_type(temperatures)) # 'int8'
Output:
uint8
int8
Structured Arrays (Multiple Types)
Define arrays with multiple named fields of different types:
import numpy as np
# Define structured dtype
person_dtype = np.dtype([
('name', 'U20'),
('age', 'i4'),
('salary', 'f8')
])
# Create structured array
people = np.array([
('Alice', 30, 75000.0),
('Bob', 25, 65000.0),
('Charlie', 35, 85000.0)
], dtype=person_dtype)
# Access fields by name
print(people['name']) # ['Alice' 'Bob' 'Charlie']
print(people['age']) # [30 25 35]
print(people[0]) # ('Alice', 30, 75000.)
Output:
['Alice' 'Bob' 'Charlie']
[30 25 35]
('Alice', 30, 75000.0)
Quick Reference
| Task | Code |
|---|---|
| Check type | arr.dtype |
| Check type name | arr.dtype.name |
| Bytes per element | arr.dtype.itemsize |
| Total bytes | arr.nbytes |
| Set type at creation | np.array([...], dtype='f4') |
| Convert type | arr.astype('int32') |
| Check if integer | np.issubdtype(arr.dtype, np.integer) |
| Check if float | np.issubdtype(arr.dtype, np.floating) |
| Get type limits | np.iinfo(np.int8) / np.finfo(np.float32) |
Summary
Always verify array types with .dtype when debugging numerical issues or unexpected behavior.
Specify dtype at creation for memory optimization-use float32 instead of float64 for ML workloads, and choose the smallest integer type that fits your data range.
Understanding type shortcuts like i4 and f8 makes code more concise while maintaining clarity.