This commit is contained in:
Jonas_Jones 2023-01-24 11:13:46 +01:00
parent 40e3b6cac1
commit 3801ab2697
9 changed files with 1463 additions and 1418 deletions

View file

@ -1,100 +1,100 @@
'''EPR 07 Aufgabe 1'''
__author__ = "7987847, Werner, 7347119, Fajst, 7735965, Melikidze"
import re
# a) All character strings between < and the next >.
# The two < and > can also be output. Example: <a>, <span …>.
# b) All (annual) numbers between 1900 and 2099.
def extract_annual(text):
'''
Extract all annual numbers between 1900 and 2099.
Return a list of strings.
valid years are 1900-2099
'''
# Extract all annual numbers between 1900 and 2099.
# Return a list of strings.
# valid years are 1900-2099
return re.findall(r'19[0-9][0-9]|20[0-9][0-9]', text)
# return re.findall(r'\b(19|20)\d\d\b', text)
def extract_tags(text):
'''
Extract all tags.
Return a list of strings.
'''
# Extract all tags.
# Return a list of strings.
return re.findall(r'<[^>]+>', text)
def extract_strings(text):
'''
Extract all strings outside that are between > and <.
Return a list of strings.
'''
# Extract all strings outside that are between > and <.
# Return a list of strings.
return re.findall(r'>([^<]+)<', text)
def main():
'''
Main function.
'''
# open file
with open('PythonEntwicklungG.txt', 'r') as file:
# read a list of lines into data
data = file.readlines()
file.close()
data_extracted = []
for line in data:
tag_list = extract_tags(line)
if tag_list:
data_extracted.append(tag_list)
strings_list = extract_strings(line)
if strings_list:
data_extracted.append(strings_list)
annual_list = extract_annual(line)
if annual_list:
data_extracted.append(annual_list)
for _ in data_extracted:
if type(_) == list:
for __ in _:
# check the type of the element
try:
if type(int(__)) == int:
# print with formatted tab spacing and color in Red
print(
f'{__:<110} <--- is an annual number',
end='\t\n')
elif type(__) == str:
# check if the string is a tag
if __.startswith('<') and __.endswith('>'):
print(
f'{__:<110} <--- is a Tag string',
end='\t\n')
else:
print(f'{__:<110} <--- is a string',
end='\t\n')
except ValueError:
if __.startswith('<') and __.endswith('>'):
print(f'{__:<110} <--- is a Tag string',
end='\t\n')
else:
print(f'{__:<110} <--- is a string', end='\t\n')
else:
print(_)
if __name__ == '__main__':
main()
'''EPR 07 Aufgabe 1'''
__author__ = "7987847, Werner, 7347119, Fajst, 7735965, Melikidze"
import re
# a) All character strings between < and the next >.
# The two < and > can also be output. Example: <a>, <span …>.
# b) All (annual) numbers between 1900 and 2099.
def extract_annual(text):
'''
Extract all annual numbers between 1900 and 2099.
Return a list of strings.
valid years are 1900-2099
'''
# Extract all annual numbers between 1900 and 2099.
# Return a list of strings.
# valid years are 1900-2099
return re.findall(r'19[0-9][0-9]|20[0-9][0-9]', text)
# return re.findall(r'\b(19|20)\d\d\b', text)
def extract_tags(text):
'''
Extract all tags.
Return a list of strings.
'''
# Extract all tags.
# Return a list of strings.
return re.findall(r'<[^>]+>', text)
def extract_strings(text):
'''
Extract all strings outside that are between > and <.
Return a list of strings.
'''
# Extract all strings outside that are between > and <.
# Return a list of strings.
return re.findall(r'>([^<]+)<', text)
def main():
'''
Main function.
'''
# open file
with open('PythonEntwicklungG.txt', 'r') as file:
# read a list of lines into data
data = file.readlines()
file.close()
data_extracted = []
for line in data:
tag_list = extract_tags(line)
if tag_list:
data_extracted.append(tag_list)
strings_list = extract_strings(line)
if strings_list:
data_extracted.append(strings_list)
annual_list = extract_annual(line)
if annual_list:
data_extracted.append(annual_list)
for _ in data_extracted:
if type(_) == list:
for __ in _:
# check the type of the element
try:
if type(int(__)) == int:
# print with formatted tab spacing and color in Red
print(
f'{__:<110} <--- is an annual number',
end='\t\n')
elif type(__) == str:
# check if the string is a tag
if __.startswith('<') and __.endswith('>'):
print(
f'{__:<110} <--- is a Tag string',
end='\t\n')
else:
print(f'{__:<110} <--- is a string',
end='\t\n')
except ValueError:
if __.startswith('<') and __.endswith('>'):
print(f'{__:<110} <--- is a Tag string',
end='\t\n')
else:
print(f'{__:<110} <--- is a string', end='\t\n')
else:
print(_)
if __name__ == '__main__':
main()