Each item in a DynamoDB table has a maximum size limit of 400 KB, including both the attribute names and values. This limit applies to all data types: strings, numbers, and binary data.

The three best ways to mitigate the maximum size limit:

  1. Partitioning the data
  2. Compressing the data
  3. Storing data in S3

How to handle large data in DynamoDB

Partition the data

A simple way to get around the item size limit is to split the data into multiple items.

Table Name: lorem

pkskdata
loremp#0Lorem ipsum dolor sit…
loremp#1Euismod nisi porta lo…
loremp#2rcu risus quis varius…
loremp#3phasellus. Enim praes…
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import boto3

def partition_data(data, size):
    return [data[i:i+size] for i in range(0, len(data), size)]


# 100 paragraphs of Lorem ipsum
lorem = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. Sem integer vitae justo eget magna. At tellus at 
urna condimentum mattis pellentesque id. Habitasse...
"""

dynamodb = boto3.resource("dynamodb")
table = dynamodb.Table("lorem")
partition_key = "lorem"
sort_key_prefix = "p#" # p for partition

# Write chunks to DynamoDB
chunks = partition_data(lorem, 5000)
for i, c in enumerate(chunks):
    table.put_item(
        Item={
            "pk": partition_key,
            "sk": f"{sort_key_prefix}{i}",
            "data": c
        }
    )

# Read chunks from DynamoDB
response = table.query(
    KeyConditionExpression="pk = :pk and begins_with(sk, :sk)",
    ExpressionAttributeValues={
        ":pk": partition_key,
        ":sk": sort_key_prefix
    },
    ScanIndexForward=True
)

# Query for all paginated results if applicable.
items = response["Items"]
while "LastEvaluatedKey" in response:
    response = table.query(ExclusiveStartKey=response["LastEvaluatedKey"])
    items.update(response["Items"])

# Concatenate the data field from all the items
lorem_from_dynamodb = "".join(i["data"] for i in items)

print(lorem == lorem_from_dynamodb) # prints True

Compress the data

Try to reduce the size of your data by compression. Compression algorithms like Gzip can significantly reduce the size of the data.

Compressing a single item

Table Name: lorem

pkdata
loremeJy1Xdly3LquffdX6ANu+…
loremeJytXFl227gS/c8quALtw…
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import boto3
import zlib


def compress_data(data):
    return zlib.compress(data.encode())


# 100 paragraphs of Lorem ipsum
lorem = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. Sem integer vitae justo eget magna. At tellus at 
urna condimentum mattis pellentesque id. Habitasse...
"""

dynamodb = boto3.resource("dynamodb")
table = dynamodb.Table("lorem")
partition_key = "lorem"
sort_key = "lorem"

table.put_item(Item={"pk": partition_key, "data": compress_data(lorem)})

response = table.get_item(Key={"pk": partition_key})
data = response["Item"]["data"]

lorem_from_dynamodb = zlib.decompress(bytes(data)).decode()
print(lorem_from_dynamodb == lorem)  # prints true

Compressing a partitioned item

DynamoDB schema:

Table Name: lorem

pkskdata
loremp#0eJy1Xdly3LquffdX6ANu+…
loremp#1eJytXFl227gS/c8quALtw…
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import boto3
import zlib


def partition_data(data, size):
    return [data[i : i + size] for i in range(0, len(data), size)]


def compress_data(data):
    return zlib.compress(data.encode())


# 100 paragraphs of Lorem ipsum
lorem = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. Sem integer vitae justo eget magna. At tellus at 
urna condimentum mattis pellentesque id. Habitasse...
"""

dynamodb = boto3.resource("dynamodb")
table = dynamodb.Table("lorem")
partition_key = "lorem"
sort_key_prefix = "p#"  # p for partition

# Write chunks to DynamoDB
chunks = partition_data(lorem, 50000)
for i, c in enumerate(chunks):
    table.put_item(
        Item={
            "pk": partition_key,
            "sk": f"{sort_key_prefix}{i}",
            "data": compress_data(c),
        }
    )

# Read chunks from DynamoDB
response = table.query(
    KeyConditionExpression="pk = :pk and begins_with(sk, :sk)",
    ExpressionAttributeValues={":pk": partition_key, ":sk": sort_key_prefix},
    ScanIndexForward=True,
)

# Query for all paginated results if applicable.
items = response["Items"]
while "LastEvaluatedKey" in response:
    response = table.query(ExclusiveStartKey=response["LastEvaluatedKey"])
    items.update(response["Items"])

# Concatenate the data field from all the items
lorem_from_dynamodb = "".join(
    zlib.decompress(bytes(i["data"])).decode() for i in items
)

print(lorem_from_dynamodb == lorem) # prints true

Store the data in S3

Consider storing the data in S3 as opposed to an attribute value in DynamoDB.

Table Name: lorem

pks3_key
lorems3://bucket/key
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import boto3

# 100 paragraphs of Lorem ipsum
lorem = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. Sem integer vitae justo eget magna. At tellus at 
urna condimentum mattis pellentesque id. Habitasse...
"""

bucket_name = "bucket_name"
object_key = "object_key"
partition_key = "lorem"
s3_key = f"s3://{bucket_name}/{object_key}"

# Store data in S3 object
s3 = boto3.client("s3")
s3.put_object(Bucket=bucket_name, Key=object_key, Body=lorem.encode())

# Store reference to S3 object in DynamoDB
dynamodb = boto3.resource("dynamodb")
table = dynamodb.Table("lorem")
table.put_item(Item={"pk": partition_key, "s3_key": s3_key})

# Get reference to S3 object in DynamoDB
response = table.get_item(Key={"pk": partition_key})
s3_key = response["Item"]["s3_key"]

# Read contents of S3 object
bucket, key = s3_key[5:].split("/")  # remove "s3://" prefix and split on "/"
response = s3.get_object(Bucket=bucket, Key=key)
lorem_from_s3 = response["Body"].read().decode()

print(lorem_from_s3 == lorem) # prints True