# uncomment to install these libraries
# !pip install boto3 botocore
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import boto3
import botocore
import os
from IPython.display import clear_output
Setup
= boto3.client('s3', config=botocore.config.Config(signature_version=botocore.UNSIGNED))
s3 = 'openaq-fetches'
bucket_name = 'realtime-gzipped/'
prefix
= '/content/drive/MyDrive/IJCAI-21/data/OpenAQ-Delhi/'
path
= '2020/01/01' # start date (inclusive)
start_date = '2020/12/31' # end date (inclusive) end_date
Download
for date in pd.date_range(start=start_date, end=end_date):
=True)
clear_output(wait= str(date).split(' ')[0] # keeping just YYYY-MM-DD from YYYY-MM-DD HH:MM:SS
date print('Downloading:', date)
= s3.list_objects(Bucket = bucket_name, Prefix = prefix+date)
data_dict
for file_obj in data_dict['Contents']:
= file_obj['Key']
f_name = '/'.join((path+f_name).split('/')[:-1])
tmp_path
if not os.path.exists(tmp_path):
os.makedirs(tmp_path)
+f_name) s3.download_file(bucket_name, f_name, path
Downloading: 2020-05-04
Validate
for date in pd.date_range(start=start_date, end=end_date):
= str(date).split(' ')[0] # keeping just YYYY-MM-DD from YYYY-MM-DD HH:MM:SS
date = s3.list_objects(Bucket = bucket_name, Prefix = prefix+date)
data_dict
for file_obj in data_dict['Contents']:
assert os.path.exists(path+file_obj['Key']), file_obj['Key']
print('Validated')