How to List AWS S3 Bucket Names and Prefixes

If you need more information about the objects and buckets in your S3 data lake, the quickest solution is likely to be the SDK and AWS CLI.

In this example, we’ll be using the boto3 Python SDK to achieve two goals:

Find all bucket names and prefixes
Find all bucket names and keys

Pre-requisites:

Configure the AWS CLI on your machine
Install Python
Install boto3 (Run: pip install boto3)

Find All Bucket Names and Prefixes

import boto3

s3_client = boto3.client("s3")
s3_resource = boto3.resource('s3')

paginator = s3_client.get_paginator("list_objects_v2")

def get_matching_s3_objects(bucket):
    """
    Generate all CommonPrefixes in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    """
    
    kwargs = {'Bucket': bucket, 'Delimiter': '/'}
    
    for page in paginator.paginate(**kwargs):
        try:
            prefix = page["CommonPrefixes"]
        except KeyError:
            break

        for obj in prefix:
            yield obj

def get_matching_s3_prefixes(bucket):
    """
    Retrieve just the Prefix from CommonPrefixes.

    :param bucket: Name of the S3 bucket.
    """
    for obj in get_matching_s3_objects(bucket):
        yield obj["Prefix"]

def main():
    for bucket in s3_resource.buckets.all():
        try:
            for prefix in get_matching_s3_prefixes(bucket.name):
                prefix = prefix.replace('/','')
                print(f"{bucket.name},{prefix}")
        except:
            print(f"Cannot access bucket: {bucket.name}")
        
if __name__ == '__main__':
    main()

Find All Bucket Names and Keys

import boto3

s3 = boto3.client("s3")
paginator = s3.get_paginator("list_objects_v2")

def get_matching_s3_objects(bucket):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    """
    
    kwargs = {'Bucket': bucket}
    
    for page in paginator.paginate(**kwargs):
        try:
            contents = page["Contents"]
        except KeyError:
            break

        for obj in contents:
            yield obj

def get_matching_s3_keys(bucket):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    """
    for obj in get_matching_s3_objects(bucket):
        yield obj["Key"]

#for bucket in s3.buckets.all():
def main():
    bucket='voyager-demo-curated'
    for key in get_matching_s3_keys(bucket):
        print(f"{bucket}/{key}")
        
if __name__ == '__main__':
    main()

Find All Bucket Names and Prefixes

Find All Bucket Names and Keys

About Daniel Andrews