openring/parse_feeds_from_xml.py

30 lines
869 B
Python

import xml.etree.ElementTree as ET
# Path to the XML file on the filesystem
file_path = 'feeds.xml'
# Read the XML content from the file
with open(file_path, 'r') as file:
xml_content = file.read()
# Parse the XML content
root = ET.fromstring(xml_content)
# Collect all the xmlUrl values
# Filter out kill-the-newsletter feeds as these blogs are not supporting rss natively
xml_urls = [
outline.get('xmlUrl')
for outline in root.findall('.//outline')
if outline.get('xmlUrl') is not None and
"kill-the-newsletter.com" not in outline.get('xmlUrl')
]
# Path to the text file to store xmlUrls
output_file_path = 'feeds.txt'
# Write xmlUrls to the text file
with open(output_file_path, 'w') as output_file:
output_file.write('\n'.join(xml_urls))
print("XML URLs have been written to the output file:", output_file_path)