Insights from Data with BigQuery: Challenge Lab

 


GSP787 : Insights from Data with BigQuery: Challenge Lab :-


----------------------------------------------------------------------------------------------------------------------------------------------


// Navigation Menu -> BigQuery.


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 1 : Total Confirmed Cases :-



SELECT sum(cumulative_confirmed) as total_cases_worldwide FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where date='2020-04-15'


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 2 : Worst Affected Areas :-



with deaths_by_states as (


SELECT subregion1_name as state, sum(cumulative_deceased) as death_count


FROM `bigquery-public-data.covid19_open_data.covid19_open_data` 


where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL


group by subregion1_name


)


select count(*) as count_of_states


from deaths_by_states


where death_count > 100


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 3 : Identifying Hotspots :-



SELECT * FROM (


SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases


FROM `bigquery-public-data.covid19_open_data.covid19_open_data`


WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL


GROUP BY subregion1_name


ORDER BY total_confirmed_cases DESC ) WHERE total_confirmed_cases > 1000


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 4 : Fatality Ratio :-



SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio 


FROM `bigquery-public-data.covid19_open_data.covid19_open_data` 


where country_name="Italy" AND date BETWEEN '2020-04-01'and '2020-04-30'


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 5 : Identifying specific day :-



SELECT date


FROM `bigquery-public-data.covid19_open_data.covid19_open_data` 


where country_name="Italy" and cumulative_deceased>10000


order by date asc


limit 1


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 6 : Finding days with zero net new cases :-



WITH india_cases_by_date AS (


  SELECT


    date,


    SUM( cumulative_confirmed ) AS cases


  FROM


    `bigquery-public-data.covid19_open_data.covid19_open_data`


  WHERE


    country_name ="India"


    AND date between '2020-02-21' and '2020-03-15'


  GROUP BY


    date


  ORDER BY


    date ASC 


 )


, india_previous_day_comparison AS 


(SELECT


  date,


  cases,


  LAG(cases) OVER(ORDER BY date) AS previous_day,


  cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases


FROM india_cases_by_date


)


select count(*)


from india_previous_day_comparison


where net_new_cases=0


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 7 : Doubling rate :-



WITH us_cases_by_date AS (


  SELECT


    date,


    SUM(cumulative_confirmed) AS cases


  FROM


    `bigquery-public-data.covid19_open_data.covid19_open_data`


  WHERE


    country_name="United States of America"


    AND date between '2020-03-22' and '2020-04-20'


  GROUP BY


    date


  ORDER BY


    date ASC 


 )


 


, us_previous_day_comparison AS 


(SELECT


  date,


  cases,


  LAG(cases) OVER(ORDER BY date) AS previous_day,


  cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,


  (cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increase


FROM us_cases_by_date


)




select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Cases


from us_previous_day_comparison


where percentage_increase > 10


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 8 : Recovery rate :-



WITH cases_by_country AS (


  SELECT


    country_name AS country,


    sum(cumulative_confirmed) AS cases,


    sum(cumulative_recovered) AS recovered_cases


  FROM


    bigquery-public-data.covid19_open_data.covid19_open_data


  WHERE


    date = '2020-05-10'


  GROUP BY


    country_name


 )


 


, recovered_rate AS 


(SELECT


  country, cases, recovered_cases,


  (recovered_cases * 100)/cases AS recovery_rate


FROM cases_by_country


)




SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate


FROM recovered_rate


WHERE cases > 50000


ORDER BY recovery_rate desc


LIMIT 10


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 9 : CDGR - Cumulative Daily Growth Rate :-



WITH


  france_cases AS (


  SELECT


    date,


    SUM(cumulative_confirmed) AS total_cases


  FROM


    `bigquery-public-data.covid19_open_data.covid19_open_data`


  WHERE


    country_name="France"


    AND date IN ('2020-01-24',


      '2020-05-10')


  GROUP BY


    date


  ORDER BY


    date)


, summary as (


SELECT


  total_cases AS first_day_cases,


  LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases,


  DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff


FROM


  france_cases


LIMIT 1


)


select first_day_cases, last_day_cases, days_diff, POW((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr


from summary


----------------------------------------------------------------------------------------------------------------------------------------------


Task - 10 : Create a Datastudio report :-



SELECT


  date, SUM(cumulative_confirmed) AS country_cases,


  SUM(cumulative_deceased) AS country_deaths


FROM


  `bigquery-public-data.covid19_open_data.covid19_open_data`


WHERE


  date BETWEEN '2020-03-15'


  AND '2020-04-30'


  AND country_name ="United States of America"


GROUP BY date

Comments

Popular posts from this blog

Perform Foundational Data, ML, and AI Tasks in Google Cloud

Deploy to Kubernetes in Google Cloud: Challenge Lab