# uncomment to install these libraries
# !pip install boto3 botocore
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import boto3
import botocore
import os
from IPython.display import clear_outputSetup
s3 = boto3.client('s3', config=botocore.config.Config(signature_version=botocore.UNSIGNED))
bucket_name = 'openaq-fetches'
prefix = 'realtime-gzipped/'
path = '/content/drive/MyDrive/IJCAI-21/data/OpenAQ-Delhi/'
start_date = '2020/01/01' # start date (inclusive)
end_date = '2020/12/31' # end date (inclusive)Download
for date in pd.date_range(start=start_date, end=end_date):
clear_output(wait=True)
date = str(date).split(' ')[0] # keeping just YYYY-MM-DD from YYYY-MM-DD HH:MM:SS
print('Downloading:', date)
data_dict = s3.list_objects(Bucket = bucket_name, Prefix = prefix+date)
for file_obj in data_dict['Contents']:
f_name = file_obj['Key']
tmp_path = '/'.join((path+f_name).split('/')[:-1])
if not os.path.exists(tmp_path):
os.makedirs(tmp_path)
s3.download_file(bucket_name, f_name, path+f_name)Downloading: 2020-05-04
Validate
for date in pd.date_range(start=start_date, end=end_date):
date = str(date).split(' ')[0] # keeping just YYYY-MM-DD from YYYY-MM-DD HH:MM:SS
data_dict = s3.list_objects(Bucket = bucket_name, Prefix = prefix+date)
for file_obj in data_dict['Contents']:
assert os.path.exists(path+file_obj['Key']), file_obj['Key']
print('Validated')