Storing large items in DynamoDB
Each item in a DynamoDB table has a maximum size limit of 400 KB, including both the attribute names and values. This limit applies to all data types: strings, numbers, and binary data.
The three best ways to mitigate the maximum size limit:
How to handle large data in DynamoDB
Partition the data
A simple way to get around the item size limit is to split the data into multiple items.
Table Name: lorem
pk | sk | data |
---|---|---|
lorem | p#0 | Lorem ipsum dolor sit… |
lorem | p#1 | Euismod nisi porta lo… |
lorem | p#2 | rcu risus quis varius… |
lorem | p#3 | phasellus. Enim praes… |
1import boto3
2
3def partition_data(data, size):
4 return [data[i:i+size] for i in range(0, len(data), size)]
5
6
7# 100 paragraphs of Lorem ipsum
8lorem = """
9Lorem ipsum dolor sit amet, consectetur adipiscing elit,
10sed do eiusmod tempor incididunt ut labore et dolore magna
11aliqua. Sem integer vitae justo eget magna. At tellus at
12urna condimentum mattis pellentesque id. Habitasse...
13"""
14
15dynamodb = boto3.resource("dynamodb")
16table = dynamodb.Table("lorem")
17partition_key = "lorem"
18sort_key_prefix = "p#" # p for partition
19
20# Write chunks to DynamoDB
21chunks = partition_data(lorem, 5000)
22for i, c in enumerate(chunks):
23 table.put_item(
24 Item={
25 "pk": partition_key,
26 "sk": f"{sort_key_prefix}{i}",
27 "data": c
28 }
29 )
30
31# Read chunks from DynamoDB
32response = table.query(
33 KeyConditionExpression="pk = :pk and begins_with(sk, :sk)",
34 ExpressionAttributeValues={
35 ":pk": partition_key,
36 ":sk": sort_key_prefix
37 },
38 ScanIndexForward=True
39)
40
41# Query for all paginated results if applicable.
42items = response["Items"]
43while "LastEvaluatedKey" in response:
44 response = table.query(ExclusiveStartKey=response["LastEvaluatedKey"])
45 items.update(response["Items"])
46
47# Concatenate the data field from all the items
48lorem_from_dynamodb = "".join(i["data"] for i in items)
49
50print(lorem == lorem_from_dynamodb) # prints True
Compress the data
Try to reduce the size of your data by compression. Compression algorithms like Gzip can significantly reduce the size of the data.
Compressing a single item
Table Name: lorem
pk | data |
---|---|
lorem | eJy1Xdly3LquffdX6ANu+… |
lorem | eJytXFl227gS/c8quALtw… |
1import boto3
2import zlib
3
4
5def compress_data(data):
6 return zlib.compress(data.encode())
7
8
9# 100 paragraphs of Lorem ipsum
10lorem = """
11Lorem ipsum dolor sit amet, consectetur adipiscing elit,
12sed do eiusmod tempor incididunt ut labore et dolore magna
13aliqua. Sem integer vitae justo eget magna. At tellus at
14urna condimentum mattis pellentesque id. Habitasse...
15"""
16
17dynamodb = boto3.resource("dynamodb")
18table = dynamodb.Table("lorem")
19partition_key = "lorem"
20sort_key = "lorem"
21
22table.put_item(Item={"pk": partition_key, "data": compress_data(lorem)})
23
24response = table.get_item(Key={"pk": partition_key})
25data = response["Item"]["data"]
26
27lorem_from_dynamodb = zlib.decompress(bytes(data)).decode()
28print(lorem_from_dynamodb == lorem) # prints true
Compressing a partitioned item
DynamoDB schema:
Table Name: lorem
pk | sk | data |
---|---|---|
lorem | p#0 | eJy1Xdly3LquffdX6ANu+… |
lorem | p#1 | eJytXFl227gS/c8quALtw… |
1import boto3
2import zlib
3
4
5def partition_data(data, size):
6 return [data[i : i + size] for i in range(0, len(data), size)]
7
8
9def compress_data(data):
10 return zlib.compress(data.encode())
11
12
13# 100 paragraphs of Lorem ipsum
14lorem = """
15Lorem ipsum dolor sit amet, consectetur adipiscing elit,
16sed do eiusmod tempor incididunt ut labore et dolore magna
17aliqua. Sem integer vitae justo eget magna. At tellus at
18urna condimentum mattis pellentesque id. Habitasse...
19"""
20
21dynamodb = boto3.resource("dynamodb")
22table = dynamodb.Table("lorem")
23partition_key = "lorem"
24sort_key_prefix = "p#" # p for partition
25
26# Write chunks to DynamoDB
27chunks = partition_data(lorem, 50000)
28for i, c in enumerate(chunks):
29 table.put_item(
30 Item={
31 "pk": partition_key,
32 "sk": f"{sort_key_prefix}{i}",
33 "data": compress_data(c),
34 }
35 )
36
37# Read chunks from DynamoDB
38response = table.query(
39 KeyConditionExpression="pk = :pk and begins_with(sk, :sk)",
40 ExpressionAttributeValues={":pk": partition_key, ":sk": sort_key_prefix},
41 ScanIndexForward=True,
42)
43
44# Query for all paginated results if applicable.
45items = response["Items"]
46while "LastEvaluatedKey" in response:
47 response = table.query(ExclusiveStartKey=response["LastEvaluatedKey"])
48 items.update(response["Items"])
49
50# Concatenate the data field from all the items
51lorem_from_dynamodb = "".join(
52 zlib.decompress(bytes(i["data"])).decode() for i in items
53)
54
55print(lorem_from_dynamodb == lorem) # prints true
Store the data in S3
Consider storing the data in S3 as opposed to an attribute value in DynamoDB.
Table Name: lorem
pk | s3_key |
---|---|
lorem | s3://bucket/key |
1import boto3
2
3# 100 paragraphs of Lorem ipsum
4lorem = """
5Lorem ipsum dolor sit amet, consectetur adipiscing elit,
6sed do eiusmod tempor incididunt ut labore et dolore magna
7aliqua. Sem integer vitae justo eget magna. At tellus at
8urna condimentum mattis pellentesque id. Habitasse...
9"""
10
11bucket_name = "bucket_name"
12object_key = "object_key"
13partition_key = "lorem"
14s3_key = f"s3://{bucket_name}/{object_key}"
15
16# Store data in S3 object
17s3 = boto3.client("s3")
18s3.put_object(Bucket=bucket_name, Key=object_key, Body=lorem.encode())
19
20# Store reference to S3 object in DynamoDB
21dynamodb = boto3.resource("dynamodb")
22table = dynamodb.Table("lorem")
23table.put_item(Item={"pk": partition_key, "s3_key": s3_key})
24
25# Get reference to S3 object in DynamoDB
26response = table.get_item(Key={"pk": partition_key})
27s3_key = response["Item"]["s3_key"]
28
29# Read contents of S3 object
30bucket, key = s3_key[5:].split("/") # remove "s3://" prefix and split on "/"
31response = s3.get_object(Bucket=bucket, Key=key)
32lorem_from_s3 = response["Body"].read().decode()
33
34print(lorem_from_s3 == lorem) # prints True