Integrate with Machine Learning APIs: Challenge Lab

 


GSP329 : Integrate with Machine Learning APIs: Challenge Lab :-


----------------------------------------------------------------------------------------------------------------------------------------------


// Run in Cloud Shell :-


export SANAME=challenge

gcloud iam service-accounts create $SANAME

gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/bigquery.admin

gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/storage.admin

gcloud iam service-accounts keys create sa-key.json --iam-account $SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com

export GOOGLE_APPLICATION_CREDENTIALS=${PWD}/sa-key.json

gsutil cp gs://$DEVSHELL_PROJECT_ID/analyze-images.py .



// Open Editor and replace the content of "analyze-images.py" file with :-



# Dataset: image_classification_dataset


# Table name: image_text_detail


import os


import sys




# Import Google Cloud Library modules


from google.cloud import storage, bigquery, language, vision, translate_v2




if ('GOOGLE_APPLICATION_CREDENTIALS' in os.environ):


    if (not os.path.exists(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])):


        print ("The GOOGLE_APPLICATION_CREDENTIALS file does not exist.\n")


        exit()


else:


    print ("The GOOGLE_APPLICATION_CREDENTIALS environment variable is not defined.\n")


    exit()




if len(sys.argv)<3:


    print('You must provide parameters for the Google Cloud project ID and Storage bucket')


    print ('python3 '+sys.argv[0]+ '[PROJECT_NAME] [BUCKET_NAME]')


    exit()




project_name = sys.argv[1]


bucket_name = sys.argv[2]




# Set up our GCS, BigQuery, and Natural Language clients


storage_client = storage.Client()


bq_client = bigquery.Client(project=project_name)


nl_client = language.LanguageServiceClient()




# Set up client objects for the vision and translate_v2 API Libraries


vision_client = vision.ImageAnnotatorClient()


translate_client = translate_v2.Client()




# Setup the BigQuery dataset and table objects


dataset_ref = bq_client.dataset('image_classification_dataset')


dataset = bigquery.Dataset(dataset_ref)


table_ref = dataset.table('image_text_detail')


table = bq_client.get_table(table_ref)




# Create an array to store results data to be inserted into the BigQuery table


rows_for_bq = []




# Get a list of the files in the Cloud Storage Bucket


files = storage_client.bucket(bucket_name).list_blobs()


bucket = storage_client.bucket(bucket_name)




print('Processing image files from GCS. This will take a few minutes..')




# Process files from Cloud Storage and save the result to send to BigQuery


for file in files:    


    if file.name.endswith('jpg') or  file.name.endswith('png'):


        file_content = file.download_as_string()


        


        # TBD: Create a Vision API image object called image_object 


        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image


        from google.cloud import vision_v1


        import io


        client = vision.ImageAnnotatorClient()






        # TBD: Detect text in the image and save the response data into an object called response


        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection


        image = vision_v1.types.Image(content=file_content)


        response = client.text_detection(image=image)


    


        # Save the text content found by the vision API into a variable called text_data


        text_data = response.text_annotations[0].description




        # Save the text detection response data in <filename>.txt to cloud storage


        file_name = file.name.split('.')[0] + '.txt'


        blob = bucket.blob(file_name)


        # Upload the contents of the text_data string variable to the Cloud Storage file 


        blob.upload_from_string(text_data, content_type='text/plain')




        # Extract the description and locale data from the response file


        # into variables called desc and locale


        # using response object properties e.g. response.text_annotations[0].description


        desc = response.text_annotations[0].description


        locale = response.text_annotations[0].locale


        


        # if the locale is English (en) save the description as the translated_txt


        if locale == 'en':


            translated_text = desc


        else:


            # TBD: For non EN locales pass the description data to the translation API


            # ref: https://googleapis.dev/python/translation/latest/client.html#google.cloud.translate_v2.client.Client.translate


            # Set the target_language locale to 'en')


            from google.cloud import translate_v2 as translate


            


            client = translate.Client()


            translation = translate_client.translate(text_data, target_language='en')


            translated_text = translation['translatedText']


        print(translated_text)


        


        # if there is response data save the original text read from the image, 


        # the locale, translated text, and filename


        if len(response.text_annotations) > 0:


            rows_for_bq.append((desc, locale, translated_text, file.name))




print('Writing Vision API image data to BigQuery...')


# Write original text, locale and translated text to BQ


# TBD: When the script is working uncomment the next line to upload results to BigQuery


errors = bq_client.insert_rows(table, rows_for_bq)




assert errors == []






// In Cloud Shell run :-


python3 analyze-images.py $DEVSHELL_PROJECT_ID $DEVSHELL_PROJECT_ID


// Navigation Menu -> BigQuery, Run :-


SELECT locale,COUNT(locale) as lcount FROM image_classification_dataset.image_text_detail GROUP BY locale ORDER BY lcount DESC

Comments

Popular posts from this blog

Perform Foundational Data, ML, and AI Tasks in Google Cloud

Ict based smart monitoring system

Insights from Data with BigQuery: Challenge Lab