Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Correct indentation, so that CodeQL can work with the code #11166

Merged
merged 1 commit into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 57 additions & 53 deletions packager-codes/get_packager_code_from_html_ireland.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,114 +8,118 @@

# In[]:

import pandas as pd
urls = ['https://oapi.fsai.ie/LAApprovedEstablishments.aspx',
'https://oapi.fsai.ie/AuthReg99901Establishments.aspx',
'https://oapi.fsai.ie/HSEApprovedEstablishments.aspx'
]
]
urls_second_format = ['https://www.sfpa.ie/Seafood-Safety/Registration-Approval-of-Businesses/List-of-Approved-Establishments-and-Vessels/Approved-Establishments',
'https://www.sfpa.ie/Seafood-Safety/Registration-Approval-of-Businesses/Approved-Freezer-Vessels'
]
]

csv_file = 'Ireland_concatenated.csv'

import pandas as pd
pages = [pd.read_html(url) for url in urls]
pages2= [pd.read_html(url) for url in urls_second_format]
pages2 = [pd.read_html(url) for url in urls_second_format]


# In[]:

def ireland_correction_of_1_dataframe(df): #Version to get anything
#print ("df as recuperated :")
#print(df.head())
def ireland_correction_of_1_dataframe(df): # Version to get anything
# print ("df as recuperated :")
# print(df.head())
df.columns = df.iloc[[0]].values.tolist()
df = df.rename(columns={' Address': 'Address'})
df=df.drop(df.index[0]) #
df = df.drop(df.index[0])
row_reference = df.iloc[0]

if 'Approval_Number' not in df.columns:
print("this table has no approval number and was not added")
return pd.DataFrame()

df_is_null=df.isnull()
for i in range(1,len(df)): #len(df)
if df_is_null.iloc[i,len(df.columns)-1]: #We assume that on a row, there is no merged cell(null in pandas) on the webpage after an unmerged cell (not null)
row_retrieved=[]
df_is_null = df.isnull()
for i in range(1, len(df)): # len(df)
# We assume that on a row, there is no merged cell(null in pandas) on the webpage after an unmerged cell (not null)
if df_is_null.iloc[i, len(df.columns)-1]:
row_retrieved = []
value = ""
j=0
while not df_is_null.iloc[i,j]:
value=df.iloc[i,j]
j = 0
while not df_is_null.iloc[i, j]:
value = df.iloc[i, j]
row_retrieved.append(value)
#print("while loop - j:"+str(j)+ "value : "+str(value))
j+=1
# print("while loop - j:"+str(j)+ "value : "+str(value))
j += 1
row = row_reference.copy()
row[len(row)-len(row_retrieved):len(row)]=row_retrieved
df.iloc[i]= row

row_reference =df.iloc[i]
row[len(row)-len(row_retrieved):len(row)] = row_retrieved
df.iloc[i] = row

row_reference = df.iloc[i]

df["Address"]=df["Address"].apply(add_space_before_uppercase)
df["Address"] = df["Address"].apply(add_space_before_uppercase)

#print ("result corrected : ")
#print(df.head())
# print ("result corrected : ")
# print(df.head())
return df

#df=pages[0][18]
#ireland_correction_of_1_dataframe(df)
# df=pages[0][18]
# ireland_correction_of_1_dataframe(df)


# In[]:

def add_space_before_uppercase(words):
result=""
for s in words:
if isinstance(s, str):
if s.isupper():
result+=" "
result+=s
return result
result = ""
for s in words:
if isinstance(s, str):
if s.isupper():
result += " "
result += s
return result


""" This could have been done more efficienty using Regex r"[a-z][A-Z]"" and avoid r" [A-Z]". But google maps recognize it this way."""


# In[ ]:

df=pd.DataFrame()
df = pd.DataFrame()


# In[]:

i=0
i = 0
for page in pages:
j=0
j = 0
for table in page:
df=df.append(ireland_correction_of_1_dataframe(table), ignore_index=True)
#print ("table "+str(j)+" is ok")
#j+=1
print ("page "+str(i)+" is done")
i+=1
df = df.append(ireland_correction_of_1_dataframe(
table), ignore_index=True)
# print ("table "+str(j)+" is ok")
# j+=1
print("page "+str(i)+" is done")
i += 1
print("finished for all in urls!")


# In[]:

i=0
i = 0
for page2 in pages2:
j=0
j = 0
for table in page2:
#print (table.head(3))
table=table.drop(table.index[0])
table.loc[0,0]='Approval_Number'
#print (ireland_correction_of_1_dataframe(table).head())
df=df.append(ireland_correction_of_1_dataframe(table), ignore_index=True)
print ("table "+str(j)+" is ok")
j+=1
print ("page "+str(i)+" is done")
i+=1
# print (table.head(3))
table = table.drop(table.index[0])
table.loc[0, 0] = 'Approval_Number'
# print (ireland_correction_of_1_dataframe(table).head())
df = df.append(ireland_correction_of_1_dataframe(
table), ignore_index=True)
print("table "+str(j)+" is ok")
j += 1
print("page "+str(i)+" is done")
i += 1
print("finished for table in urls_second_format!")


# In[]:


df.to_csv(csv_file, index = False)
df.to_csv(csv_file, index=False)
18 changes: 11 additions & 7 deletions scripts/generate_dump_for_offline_apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import pandas


def main():
if not (os.getenv('OFF_PUBLIC_DATA_DIR') and os.getenv('PRODUCT_OPENER_FLAVOR') and os.getenv('PRODUCT_OPENER_FLAVOR_SHORT')):
print("Environment variables OFF_PUBLIC_DATA_DIR, PRODUCT_OPENER_FLAVOR and PRODUCT_OPENER_FLAVOR_SHORT are required")
Expand All @@ -13,15 +14,18 @@ def main():

if not os.path.exists(off_public_data_dir + '/offline'):
os.makedirs(off_public_data_dir + '/offline')

df = pandas.read_csv(off_public_data_dir + '/en.' + product_opener_flavor + '.org.products.csv', sep='\t', low_memory=False)
colnames = ['code','product_name','quantity','brands']

df = pandas.read_csv(off_public_data_dir + '/en.' + product_opener_flavor +
'.org.products.csv', sep='\t', low_memory=False)
colnames = ['code', 'product_name', 'quantity', 'brands']
# add 'nutriscore_grade','nova_group','environmental_score_grade' columns if the flavor is off
if product_opener_flavor_short == 'off':
colnames = colnames + ['nutriscore_grade','nova_group','environmental_score_grade']
colnames = colnames + ['nutriscore_grade',
'nova_group', 'environmental_score_grade']

df.rename(columns={'nutriscore_grade': 'nutrition_grade_fr'}).to_csv(off_public_data_dir + '/offline/en.' +
product_opener_flavor + '.org.products.small.csv', columns=colnames, sep='\t', index=False)


df.rename(columns={'nutriscore_grade': 'nutrition_grade_fr'}).to_csv(off_public_data_dir + '/offline/en.' + product_opener_flavor + '.org.products.small.csv', columns = colnames,sep='\t',index=False)

if __name__ == '__main__':
main()

96 changes: 51 additions & 45 deletions scripts/mappingGES.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,75 +14,81 @@

temporary_exists = os.path.isfile(PATH_TO_TEMPORARY)
if temporary_exists:
print "The temporary file already exists"
exit()
print "The temporary file already exists"
exit()

ingredients_exists = os.path.isfile(PATH_TO_INGREDIENTS)
if not ingredients_exists:
print "The ingredient file does not exist, check the path :" + PATH_TO_INGREDIENTS
exit()
print "The ingredient file does not exist, check the path :" + PATH_TO_INGREDIENTS
exit()

foodGES_exists = os.path.isfile(PATH_TO_FOODGES)
if not foodGES_exists:
print "The foodGES file does not exist, check the path :" + PATH_TO_FOODGES
exit()
print "The foodGES file does not exist, check the path :" + PATH_TO_FOODGES
exit()


def check_next_lines(ingredients):
next_line_is_not_foodges = True
keep_lines = []
while next_line_is_not_foodges:
next_line = ingredients.readline()
keep_lines.append(next_line)
if STRING_FOODGES_VALUE not in next_line and STRING_FOODGES_INGREDIENT not in next_line:
next_line_is_not_foodges = False
return keep_lines
next_line_is_not_foodges = True
keep_lines = []
while next_line_is_not_foodges:
next_line = ingredients.readline()
keep_lines.append(next_line)
if STRING_FOODGES_VALUE not in next_line and STRING_FOODGES_INGREDIENT not in next_line:
next_line_is_not_foodges = False
return keep_lines


def write_next_lines(next_lines, temporary_file):
size = len(next_lines)
for i in range(0, size-1):
line = next_lines[i]
if STRING_FOODGES_INGREDIENT in line:
temporary_file.write(line)
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(STRING_FOODGES_VALUE + dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
temporary_file.write(next_lines[size-1])
size = len(next_lines)
for i in range(0, size-1):
line = next_lines[i]
if STRING_FOODGES_INGREDIENT in line:
temporary_file.write(line)
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(
STRING_FOODGES_VALUE + dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
temporary_file.write(next_lines[size-1])


with open(PATH_TO_FOODGES, 'r') as csvFile:
reader = csv.reader(csvFile)
for row in reader:
dict[row[2]]=row[1]
unused_mappings.append(row[2])
reader = csv.reader(csvFile)
for row in reader:
dict[row[2]] = row[1]
unused_mappings.append(row[2])

csvFile.close()

temporary_file = open(PATH_TO_TEMPORARY,"w+")
temporary_file = open(PATH_TO_TEMPORARY, "w+")
ingredients = file(PATH_TO_INGREDIENTS)

while True:
line = ingredients.readline()
temporary_file.write(line)
if not line: break
if STRING_FOODGES_INGREDIENT in line:
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(STRING_FOODGES_VALUE + dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
next_lines = check_next_lines(ingredients)
write_next_lines(next_lines, temporary_file)
line = ingredients.readline()
temporary_file.write(line)
if not line:
break
if STRING_FOODGES_INGREDIENT in line:
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(STRING_FOODGES_VALUE +
dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
next_lines = check_next_lines(ingredients)
write_next_lines(next_lines, temporary_file)

ingredients.close()
temporary_file.close()
temporary_file.close()

os.remove(PATH_TO_INGREDIENTS)
os.rename(PATH_TO_TEMPORARY, PATH_TO_INGREDIENTS)

print("\n")
print "This is the list of unused mapping : "
for mapping in unused_mappings:
print mapping
print mapping
Loading
Loading