This Python notebook compares two database tables and finds differences between them. The code highlights the following:

  • differences in column names
  • differences in row count
  • TransactionIds (the common unique identifier) in only one of the two tables
  • differences between the other columns (following an inner join on TransactionId)

The outputs (listed after the print statement in the final cell) include CSVs, text files, and a summary image.

Name data sources and configure settings¶

In [1]:
source_data_name1 = 'cloud'
source_data_name2 = 'prem'

replace_null_like_values_with_0 = True
normlaise_case_trim_remove_special_chars = True
remove_named_columns = False

Import models¶

In [2]:
import pandas as pd
import numpy as np
import collections
import re
import pyperclip
import itertools
import os
from win32api import MessageBox
from datetime import datetime
import os
import shutil
import sqlite3
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

%autosave 0
Autosave disabled

Make output folder (with timestamp)¶

In [3]:
str_current_datetime = str(datetime.now().strftime("%Y-%m-%d"))

output_folder = "Outputs/" + source_data_name1 + " vs " + source_data_name2 + " (" + str_current_datetime + ")"
try:
    shutil.rmtree(output_folder)
except:
    pass
os.makedirs(output_folder)

output_folder_column_names = output_folder + "/Column name comparison"
os.makedirs(output_folder_column_names)

output_folder_UI = output_folder + "/Unique identifiers missing in each"
os.makedirs(output_folder_UI)

Import data (into df1 and df2)¶

In [4]:
conn = sqlite3.connect('Inputs/database.db')
In [5]:
sql1 = "SELECT * FROM trans_c"
df1 = pd.read_sql_query(sql1, conn)

df1 = df1.astype(str)
df1_records_with_dups = df1.shape[0]

print(source_data_name1 + " data has " + str(df1_records_with_dups) + " records and " + str(df1.shape[1]) + " columns")
cloud data has 200 records and 7 columns
In [6]:
sql2 = "SELECT * FROM trans_p"
df2 = pd.read_sql_query(sql2, conn)

df2 = df2.astype(str)
df2_records_with_dups = df2.shape[0]

print(source_data_name2 + " data has " + str(df2_records_with_dups) + " records and " + str(df2.shape[1]) + " columns")
prem data has 201 records and 7 columns
In [7]:
conn.close
Out[7]:
<function Connection.close>

Export row count text file¶

In [8]:
row_count_message = "The row count differs by " + str(abs(df1_records_with_dups - df2_records_with_dups))

row_count = output_folder + "/Row count " + source_data_name1 + " vs " + source_data_name2 + ".txt"
with open(row_count, "w") as f:
    f.write(row_count_message + " (before any processing/filtering)")
    f.write("\n" + source_data_name1 + " has " + str(df1_records_with_dups) + " rows\n") 
    f.write(source_data_name2 + " has " + str(df2_records_with_dups) + " row")     
    
print(row_count_message)
The row count differs by 1

Drop columns that are not of interest (optional)

In [9]:
if remove_named_columns:
    columns_to_delete = [
    'Broker'
    ]

    df1 = df1.drop(columns=[col for col in df1 if col in columns_to_delete])
    df2 = df2.drop(columns=[col for col in df2 if col in columns_to_delete])

Perform column name comparison¶

In [10]:
df1_columns = set(df1.columns.values.tolist())
df2_columns = set(df2.columns.values.tolist())

df1_columns_dif = df1_columns.difference(df2_columns)
df2_columns_dif = df2_columns.difference(df1_columns)

if len(df1_columns) + len(df2_columns) == 0:
    print("These columns names appear in " + source_data_name1 + " only...")
    print(df1_columns_dif)

    print("\nThese columns names appear in " + source_data_name2 + " only...")
    print(df2_columns_dif)
else:
    print("The data sets have the same column names.")

print("\nThese columns names appear in both (and will be compared)...")
column_names_in_both = df2_columns.intersection(df1_columns)
print(column_names_in_both)  
The data sets have the same column names.

These columns names appear in both (and will be compared)...
{'IsRenewal', 'PolicyId', 'TransactionDate', 'Amount', 'TransactionId', 'Broker', 'Currency'}
In [11]:
to_append = ''
if len(df1_columns_dif) == 0:
    to_append = ' (empty)'
    
output_file = output_folder_column_names + "/Columns in df1 only" + to_append + ".txt"

with open(output_file, 'w') as f:
    for item in df1_columns_dif:
        f.write("%s\n" % item)
        
to_append = ''
if len(df2_columns_dif) == 0:
    to_append = ' (empty)'

output_file = output_folder_column_names + "/Columns in df2 only" + to_append + ".txt"

with open(output_file, 'w') as f:
    for item in df2_columns_dif:
        f.write("%s\n" % item)
        
to_append = ''
if len(column_names_in_both) == 0:
    to_append = ' (empty)'
    
output_file = output_folder_column_names + "/Columns in " + source_data_name1 + " AND " + source_data_name2 + to_append + ".txt"

with open(output_file, 'w') as f:
    for item in column_names_in_both:
        f.write("%s\n" % item) 

Remove duplicate rows (if they exist)¶

In [12]:
df1 = df1.drop_duplicates()
df1_records_no_dups = df1.shape[0]
d1_dups_removed = str(df1_records_with_dups - df1_records_no_dups)
print("Dataframe from " + source_data_name1 + " now has " + str(df1_records_no_dups) + " records (" + d1_dups_removed + " were removed) and " + str(df1.shape[1]) + " columns")
Dataframe from cloud now has 200 records (0 were removed) and 7 columns
In [13]:
df2 = df2.drop_duplicates()
df2_records_no_dups = df2.shape[0]
d2_dups_removed = str(df2_records_with_dups - df2_records_no_dups)
print("Dataframe from " + source_data_name2 + " now has " + str(df2_records_no_dups) + " records (" + d2_dups_removed + " were removed) and " + str(df2.shape[1]) + " columns")
Dataframe from prem now has 201 records (0 were removed) and 7 columns

Replace trailing 0s and (optionally) clean data¶

In [14]:
df1 = df1.replace('\.0+$','',regex=True)
df2 = df2.replace('\.0+$','',regex=True)

if replace_null_like_values_with_0:
    df1 = df1.replace(['NaT','None','0', 'nan','NULL', ''],'0')
    df2 = df2.replace(['NaT','None','0', 'nan','NULL', ''],'0')
    
if normlaise_case_trim_remove_special_chars:
    df1 = df1.apply(lambda x: x.str.lower() if x.dtype == 'object' else x)
    df1 = df1.apply(lambda x: x.str.strip() if x.dtype == 'object' else x)
    df1 = df1.replace(re.compile("\-|'"), '', regex=True)
    df2 = df2.apply(lambda x: x.str.lower() if x.dtype == 'object' else x)
    df2 = df2.apply(lambda x: x.str.strip() if x.dtype == 'object' else x)
    df2 = df2.replace(re.compile("\-|'"), '', regex=True)

Indentify unique/almost unique identifiers (for join)¶

df1¶

In [15]:
determine_unique = []

for col in df1.columns:
    uniques = df1[col].unique()
    total = len(df1[col])
    num_unique = len(uniques)
    pct_unique = (num_unique/total) * 100
    determine_unique.append(f"{pct_unique:.1f}% of values in '{col}' are unique ({num_unique} unique values out of {total})")

# Remove all items that do not contain a regex string (opposite of above)
regex = re.compile(r'100\.|9[0-9]\.')
determine_unique = [i for i in determine_unique if regex.search(i)]
    
print("These columns have at least 90% unique values...")    
determine_unique.sort()    
determine_unique
These columns have at least 90% unique values...
Out[15]:
["100.0% of values in 'Amount' are unique (200 unique values out of 200)",
 "100.0% of values in 'TransactionId' are unique (200 unique values out of 200)"]
In [16]:
cleaned_list = []
for item in determine_unique:
    item = re.findall(r"in '([^']+)", item)
    cleaned_list.append(item)
cleaned_list = list(itertools.chain(*cleaned_list))
df1_examine = df1[cleaned_list]
df1_examine.head(3)
Out[16]:
Amount TransactionId
0 47259 8028676
1 48114 1488580
2 19018 3095155

df2¶

In [17]:
determine_unique = []

for col in df2.columns:
    uniques = df2[col].unique()
    total = len(df2[col])
    num_unique = len(uniques)
    pct_unique = (num_unique/total) * 100
    determine_unique.append(f"{pct_unique:.1f}% of values in '{col}' are unique ({num_unique} unique values out of {total})")

regex = re.compile(r'100\.|9[0-9]\.')
determine_unique = [i for i in determine_unique if regex.search(i)]
    
print("These columns have at least 90% unique values...")    
determine_unique.sort()    
determine_unique
These columns have at least 90% unique values...
Out[17]:
["100.0% of values in 'Amount' are unique (201 unique values out of 201)",
 "100.0% of values in 'TransactionId' are unique (201 unique values out of 201)"]
In [18]:
cleaned_list = []
for item in determine_unique:
    item = re.findall(r"in '([^']+)", item)
    cleaned_list.append(item)
cleaned_list = list(itertools.chain(*cleaned_list))
df2_examine = df2[cleaned_list]
df2_examine.head(3)
Out[18]:
Amount TransactionId
0 47259 8028676
1 48114 1488580
2 19018 3095155

Define joining key¶

In [19]:
# DEFINE UNIQUE IDENTIFIER COLUMN FOR df1
df1['UniqueIdentifier'] = df1['TransactionId']

# DEFINE UNIQUE IDENTIFIER COLUMN FOR df2
df2['UniqueIdentifier'] = df2['TransactionId']

Remove unique identifiers/almost unique identifers from dfs if not unique¶

In [20]:
dups1 = [item for item, count in collections.Counter(df1['UniqueIdentifier'].tolist()).items() if count > 1]
dups2 = [item for item, count in collections.Counter(df2['UniqueIdentifier'].tolist()).items() if count > 1]
dups = dups1 + dups2

df1 = df1[~df1['UniqueIdentifier'].isin(dups)]
df2 = df2[~df2['UniqueIdentifier'].isin(dups)]

Find which unique identifiers appear in which datasets¶

In [21]:
df1_set = set(df1['UniqueIdentifier'])
df2_set = set(df2['UniqueIdentifier'])

# IDs only in df1
in_df1_only = df1_set.difference(df2_set)
print("Of the " + str(len(df1_set)) + " unique identifiers in " + source_data_name1 + " " + str(len(in_df1_only)) + " do not appear in " + source_data_name2)

# Output to .txt
try:
    output_file = output_folder_UI + "/Unique identifiers in " + source_data_name1 + " only (" + str(len(in_df1_only)) + " values).txt"
except:
    output_file = output_folder_UI + "/Unique identifiers in df1 only.txt"

with open(output_file, 'w') as f:
    for item in in_df1_only:
        f.write("%s\n" % item)

# IDs only in df1
in_df2_only = df2_set.difference(df1_set)
print("Of the " + str(len(df2_set)) + " unique identifiers in " + source_data_name2 + " " + str(len(in_df2_only)) + " do not appear in " + source_data_name1)

# Output to .txt
try:
    output_file = output_folder_UI + "/Unique identifiers in " + source_data_name2 + " only (" + str(len(in_df2_only)) + " values).txt"
except:
    output_file = output_folder_UI + "/Unique identifiers in in df2 only.txt"

with open(output_file, 'w') as f:
    for item in in_df2_only:
        f.write("%s\n" % item)
Of the 200 unique identifiers in cloud 3 do not appear in prem
Of the 201 unique identifiers in prem 4 do not appear in cloud

Generate code for comparisons¶

In [22]:
df1 = df1.add_suffix('_' + source_data_name1)
df2 = df2.add_suffix('_' + source_data_name2)

comparison_code = []

for item in column_names_in_both:
    code = "df_merged['" + item + "__IsDifferent'] = np.where((df_merged['" + item + '_' + source_data_name1 + "'] == df_merged['" + item + '_' + source_data_name2 + "']), 0, 1)"
    comparison_code.append(code)
    
comparison_code
Out[22]:
["df_merged['IsRenewal__IsDifferent'] = np.where((df_merged['IsRenewal_cloud'] == df_merged['IsRenewal_prem']), 0, 1)",
 "df_merged['PolicyId__IsDifferent'] = np.where((df_merged['PolicyId_cloud'] == df_merged['PolicyId_prem']), 0, 1)",
 "df_merged['TransactionDate__IsDifferent'] = np.where((df_merged['TransactionDate_cloud'] == df_merged['TransactionDate_prem']), 0, 1)",
 "df_merged['Amount__IsDifferent'] = np.where((df_merged['Amount_cloud'] == df_merged['Amount_prem']), 0, 1)",
 "df_merged['TransactionId__IsDifferent'] = np.where((df_merged['TransactionId_cloud'] == df_merged['TransactionId_prem']), 0, 1)",
 "df_merged['Broker__IsDifferent'] = np.where((df_merged['Broker_cloud'] == df_merged['Broker_prem']), 0, 1)",
 "df_merged['Currency__IsDifferent'] = np.where((df_merged['Currency_cloud'] == df_merged['Currency_prem']), 0, 1)"]

Inner join dataframes¶

In [23]:
df1.head(2)
Out[23]:
TransactionId_cloud Amount_cloud Currency_cloud PolicyId_cloud TransactionDate_cloud IsRenewal_cloud Broker_cloud UniqueIdentifier_cloud
0 8028676 47259 gbp 1621a 09/02/2023 1 james smith 8028676
1 1488580 48114 usd 1611a 09/02/2023 1 tom jarvis 1488580
In [24]:
df2.head(2)
Out[24]:
TransactionId_prem Amount_prem Currency_prem PolicyId_prem TransactionDate_prem IsRenewal_prem Broker_prem UniqueIdentifier_prem
0 8028676 47259 usd 1621a 09/02/2023 1 james smith 8028676
1 1488580 48114 usd 1611a 09/02/2023 1 tom jarvis 1488580
In [25]:
UniqueIdentifier_col = next((col for col in df1.columns if 'UniqueIdentifier' in col), None)
df1['UniqueIdentifier'] = df1[UniqueIdentifier_col]

UniqueIdentifier_col = next((col for col in df2.columns if 'UniqueIdentifier' in col), None)
df2['UniqueIdentifier'] = df2[UniqueIdentifier_col]

df_merged = pd.merge(df1, df2, how="inner", on=["UniqueIdentifier"])
df_merged = df_merged.loc[:, ~df_merged.columns.str.startswith('UniqueIdentifier_')]

Add comparion columns¶

In [26]:
for single_comparison in comparison_code:
    exec(single_comparison)

df_merged = df_merged.sort_index(axis=1)

df_rows = df_merged.shape[0] # used for % of total discrpencies later

df_merged.head()    
Out[26]:
Amount__IsDifferent Amount_cloud Amount_prem Broker__IsDifferent Broker_cloud Broker_prem Currency__IsDifferent Currency_cloud Currency_prem IsRenewal__IsDifferent IsRenewal_cloud IsRenewal_prem PolicyId__IsDifferent PolicyId_cloud PolicyId_prem TransactionDate__IsDifferent TransactionDate_cloud TransactionDate_prem TransactionId__IsDifferent TransactionId_cloud TransactionId_prem UniqueIdentifier
0 0 47259 47259 0 james smith james smith 1 gbp usd 0 1 1 0 1621a 1621a 0 09/02/2023 09/02/2023 0 8028676 8028676 8028676
1 0 48114 48114 0 tom jarvis tom jarvis 0 usd usd 0 1 1 0 1611a 1611a 0 09/02/2023 09/02/2023 0 1488580 1488580 1488580
2 0 19018 19018 0 martin jones martin jones 0 usd usd 0 0 0 0 1604a 1604a 0 10/02/2023 10/02/2023 0 3095155 3095155 3095155
3 0 28290 28290 0 martin jones martin jones 1 gbp usd 0 1 1 1 1607b 1607x 0 10/02/2023 10/02/2023 0 1263001 1263001 1263001
4 0 34128 34128 0 tom jarvis tom jarvis 1 gbp usd 0 0 0 0 1623a 1623a 0 09/02/2023 09/02/2023 0 2514622 2514622 2514622

Remove rows where all columns match, rearrange columns¶

In [27]:
# Count rows that do not match and add this as a column
df_merged['NumberOfDiscrepancies'] = df_merged.filter(regex='IsDifferent').eq(1).sum(axis=1)

# Make above column the first (leftmost) column
first_column = df_merged.pop('NumberOfDiscrepancies')
df_merged.insert(0, 'NumberOfDiscrepancies', first_column)

# Make the unique column the first (leftmost) column
first_column = df_merged.pop('UniqueIdentifier')
df_merged.insert(0, 'UniqueIdentifier', first_column)

# Remove rows with no differences
df_merged_differences_only = df_merged[df_merged.NumberOfDiscrepancies>0]
df_merged_differences_only = df_merged_differences_only.sort_values(by=['NumberOfDiscrepancies'], ascending=False)

df_merged_differences_only.head(3)
Out[27]:
UniqueIdentifier NumberOfDiscrepancies Amount__IsDifferent Amount_cloud Amount_prem Broker__IsDifferent Broker_cloud Broker_prem Currency__IsDifferent Currency_cloud Currency_prem IsRenewal__IsDifferent IsRenewal_cloud IsRenewal_prem PolicyId__IsDifferent PolicyId_cloud PolicyId_prem TransactionDate__IsDifferent TransactionDate_cloud TransactionDate_prem TransactionId__IsDifferent TransactionId_cloud TransactionId_prem
134 8227058 3 0 4625 4625 0 tom jarvis tom jarvis 1 gbp usd 0 1 1 1 1625b 1625x 1 11/02/2023 09/02/2023 0 8227058 8227058
90 2754026 3 0 28041 28041 1 martin jones james smith 1 gbp usd 0 1 1 1 1622b 1622x 0 10/02/2023 10/02/2023 0 2754026 2754026
91 6119692 3 0 24656 24656 1 tom jarvis james smith 1 gbp usd 0 0 0 1 1604b 1604x 0 11/02/2023 11/02/2023 0 6119692 6119692
In [28]:
print("df has " + str(df_merged_differences_only.shape[0]) + " rows (this many records contain at least 1 difference) and " + str(df_merged_differences_only.shape[1]) + " columns")
df has 127 rows (this many records contain at least 1 difference) and 23 columns

Export main comparison output file¶

In [29]:
df_merged_differences_only = df_merged_differences_only.loc[:, ~df_merged_differences_only.columns.str.startswith('Unnamed: ')]

try:
    if replace_null_like_values_with_0:
        output_file = output_folder + "/" + source_data_name1 + " vs " + source_data_name2 + " differences (null fixed) - " + str_current_datetime + ".csv"
    else:
        output_file = output_folder + "/" + source_data_name1 + " vs " + source_data_name2 + " differences (null unfixed) - " + str_current_datetime + ".csv"
except:
    output_file = output_folder + "/Data differences.csv"

df_merged_differences_only.to_csv(output_file, index=False)

Create and export summary output file¶

In [30]:
# Filter df to columns that contain __IsDifferent in column name
df_summary = df_merged_differences_only.loc[:, df_merged_differences_only.columns.str.endswith('__IsDifferent')]

# Sum every column
df_summary = pd.DataFrame(df_summary.sum()).T

# Transpose df
df_summary = df_summary.T

# Rename columns
df_summary = df_summary.reset_index()
df_summary.columns.values[0] = "Column"
df_summary.columns.values[1] = "Number of Discrepancies"
df_summary['Column'] = df_summary['Column'].str.replace('__IsDifferent','')

# Calculate percentage of rows where the columns show discrepencies
df_summary['Percent Of Total'] = round(df_summary['Number of Discrepancies']/df_rows*100,2)

# Sort df
df_summary= df_summary.sort_values(by=['Number of Discrepancies'], ascending=False)

# Output to CSV
try:
    if replace_null_like_values_with_0:
        output_file =  output_folder + "/" + source_data_name1 + " vs " + source_data_name2 + " summary (null fixed) - " + str_current_datetime + ".csv"
    else:
        output_file =  output_folder + "/" + source_data_name1 + " vs " + source_data_name2 + " summary (null unfixed) - " + str_current_datetime + ".csv"
except:
    output_file =  output_folder + "/Data summary.csv"
    
df_summary.to_csv(output_file, index=False)
In [31]:
df_summary
Out[31]:
Column Number of Discrepancies Percent Of Total
4 PolicyId 79 40.10
2 Currency 73 37.06
5 TransactionDate 12 6.09
1 Broker 9 4.57
3 IsRenewal 3 1.52
0 Amount 0 0.00
6 TransactionId 0 0.00

Export chart¶

In [32]:
df_summary_size = df_summary.shape[0]

plot = df_summary.plot.bar(x='Column',
                    y='Percent Of Total',
                    title='Percentage of values that differ by column ({0} rows compared)'.format(df_rows),
                    xlabel="Column name (as appears in both data sources)", 
                    ylabel="% difference", 
                    figsize=(13,5),
                    rot=90
                   );

plot.set_ylim(0, 119.75)   

for i, v in enumerate(df_summary['Percent Of Total']):
    label = '{0:.2f}%'.format(v)

    if float(label.strip('%')) == 0.00:
        color = 'green'
    else:
        color = 'red'

    plot.text(i, v + 2, label, ha='center', rotation=90, color=color)

plot.figure.savefig(output_folder + "/Summary image - " + str_current_datetime +".png", bbox_inches='tight')

Export SQL and VBA text files¶

In [33]:
vba_to_run = r'''
Sub ApplyConditionalFormatting()
    Dim cell As Range
    Dim lastColumn As Long

    lastColumn = Cells(1, Columns.Count).End(xlToLeft).Column

    For Each cell In Range("A1").Resize(1, lastColumn)
        If InStr(cell.Value2, "__IsDifferent") > 0 Then
            With Range(cell.Offset(1), Cells(Rows.Count, cell.Column).End(xlUp))
                .FormatConditions.Add Type:=xlCellValue, Operator:=xlEqual, Formula1:="=1"
                .FormatConditions(.FormatConditions.Count).SetFirstPriority
                With .FormatConditions(1).Interior
                    .PatternColorIndex = xlAutomatic
                    .Color = 255
                    .TintAndShade = 0
                End With
            End With
        End If
    Next cell
End Sub
'''

try:
    sql_file = output_folder + "/SQL for data.txt"
    with open(sql_file, "w") as f:
        f.write(sql1 + "\n") 
        f.write(sql2)
except:
    pass

vba_file = output_folder + "/VBA to highlight differences.txt"
with open(vba_file, "w") as f:
    f.write("'Run the below VBA to highlight cells in the comparison.csv....\n\n" + vba_to_run)

Copy this notebook into output folder (for audit purposes)¶

In [34]:
shutil.copyfile('Data Comparison.ipynb', output_folder + '/Analysis file.ipynb')
Out[34]:
'Outputs/cloud vs prem (2024-02-10)/Analysis file.ipynb'

Print list of outputs and VBA message¶

In [35]:
MessageBox(0, "Some VBA code is now attached to your clipboard (which can be pasted). This code will also be exported to a .txt file. Running this code in the differences.csv spreadsheet will colour cells that marks differences (discrepenices) red.", "Complete", 0x00001000)

print(vba_to_run)
pyperclip.copy(vba_to_run)
Sub ApplyConditionalFormatting()
    Dim cell As Range
    Dim lastColumn As Long

    lastColumn = Cells(1, Columns.Count).End(xlToLeft).Column

    For Each cell In Range("A1").Resize(1, lastColumn)
        If InStr(cell.Value2, "__IsDifferent") > 0 Then
            With Range(cell.Offset(1), Cells(Rows.Count, cell.Column).End(xlUp))
                .FormatConditions.Add Type:=xlCellValue, Operator:=xlEqual, Formula1:="=1"
                .FormatConditions(.FormatConditions.Count).SetFirstPriority
                With .FormatConditions(1).Interior
                    .PatternColorIndex = xlAutomatic
                    .Color = 255
                    .TintAndShade = 0
                End With
            End With
        End If
    Next cell
End Sub

In [36]:
output_folder_contents = os.listdir(output_folder)

print("These FILES were created in the '" + output_folder + "' folder...\n")
for files in output_folder_contents:
    if '.' in files:
        print(files)
        
print("\n\nThese FOLDERS were created in the '" + output_folder + "' folder...\n")
for folder in output_folder_contents:
    if '.' not in folder:
        print(folder)
These FILES were created in the 'Outputs/cloud vs prem (2024-02-10)' folder...

Analysis file.ipynb
cloud vs prem differences (null fixed) - 2024-02-10.csv
cloud vs prem summary (null fixed) - 2024-02-10.csv
Row count cloud vs prem.txt
SQL for data.txt
Summary image - 2024-02-10.png
VBA to highlight differences.txt


These FOLDERS were created in the 'Outputs/cloud vs prem (2024-02-10)' folder...

Column name comparison
Unique identifiers missing in each
In [ ]: