summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/feed_parser.rs249
1 files changed, 128 insertions, 121 deletions
diff --git a/backend/src/feed_parser.rs b/backend/src/feed_parser.rs
index 43f3439..8822f31 100644
--- a/backend/src/feed_parser.rs
+++ b/backend/src/feed_parser.rs
@@ -38,6 +38,9 @@ fn sanitize_html(input: &str) -> String {
builder.clean(input).to_string()
}
+const ATOM_NS: &str = "http://www.w3.org/2005/Atom";
+const DC_NS: &str = "http://purl.org/dc/elements/1.1/";
+
impl Feed {
pub fn parse_rss(rss: &str) -> Result<Feed, String> {
let doc = Document::parse(rss).map_err(|e| e.to_string())?;
@@ -56,77 +59,79 @@ impl Feed {
let mut items = Vec::new();
for child in channel.children() {
- if child.is_element() {
- match child.tag_name().name() {
- "title" => title = child.text().map(str::trim).map(str::to_string),
- "description" => description = child.text().map(str::trim).map(str::to_string),
- "link" => link = child.text().map(str::trim).map(str::to_string),
- "item" => {
- let mut guid = None;
- let mut title = None;
- let mut description = None;
- let mut link = None;
- let mut author = None;
- let mut published_at = None;
- for child in child.children() {
- match child.tag_name().name() {
- "guid" => {
- if let Some(text) = child.text().map(str::trim) {
- guid = Some(text.to_string());
- if link.is_none() {
- match child.attribute("isPermaLink") {
- None | Some("true") => {
- link = Some(text.to_string())
- }
- _ => {}
- }
+ if !child.is_element() || child.tag_name().namespace().is_some() {
+ continue;
+ }
+ match child.tag_name().name() {
+ "title" => title = child.text().map(str::trim).map(str::to_string),
+ "description" => description = child.text().map(str::trim).map(str::to_string),
+ "link" => link = child.text().map(str::trim).map(str::to_string),
+ "item" => {
+ let mut guid = None;
+ let mut title = None;
+ let mut description = None;
+ let mut link = None;
+ let mut author = None;
+ let mut published_at = None;
+ for child in child.children() {
+ if !child.is_element() {
+ continue;
+ }
+ match (child.tag_name().namespace(), child.tag_name().name()) {
+ (None, "guid") => {
+ if let Some(text) = child.text().map(str::trim) {
+ guid = Some(text.to_string());
+ if link.is_none() {
+ match child.attribute("isPermaLink") {
+ None | Some("true") => link = Some(text.to_string()),
+ _ => {}
}
}
}
- "title" => title = child.text().map(str::trim).map(str::to_string),
- "description" => description = child.text().map(sanitize_html),
- "link" => link = child.text().map(str::trim).map(str::to_string),
- "author" => {
- // Fallback if `dc:creator` is not defined later.
- if author.is_none() {
- author = child.text().map(str::trim).map(str::to_string)
- }
- }
- "creator" => {
- if child.tag_name().namespace()
- == Some("http://purl.org/dc/elements/1.1/")
- {
- author = child.text().map(str::trim).map(str::to_string);
- }
- }
- "pubDate" => {
- published_at = child.text().map(str::trim).and_then(|text| {
- DateTime::parse_from_rfc2822(text)
- .map(|dt| dt.naive_utc())
- .ok()
- });
+ }
+ (None, "title") => {
+ title = child.text().map(str::trim).map(str::to_string)
+ }
+ (None, "description") => description = child.text().map(sanitize_html),
+ (None, "link") => {
+ link = child.text().map(str::trim).map(str::to_string)
+ }
+ (None, "author") => {
+ // Fallback if `dc:creator` is not defined later.
+ if author.is_none() {
+ author = child.text().map(str::trim).map(str::to_string)
}
- _ => {}
}
+ (Some(DC_NS), "creator") => {
+ author = child.text().map(str::trim).map(str::to_string);
+ }
+ (None, "pubDate") => {
+ published_at = child.text().map(str::trim).and_then(|text| {
+ DateTime::parse_from_rfc2822(text)
+ .map(|dt| dt.naive_utc())
+ .ok()
+ });
+ }
+ _ => {}
}
- items.push(Item {
- guid: guid
- .or_else(|| {
- link.as_ref()
- .or_else(|| title.as_ref())
- .or_else(|| description.as_ref())
- .map(String::clone)
- })
- .ok_or_else(|| "item element without identifier")?,
- title,
- description,
- link,
- author,
- published_at,
- });
}
- _ => {}
+ items.push(Item {
+ guid: guid
+ .or_else(|| {
+ link.as_ref()
+ .or_else(|| title.as_ref())
+ .or_else(|| description.as_ref())
+ .map(String::clone)
+ })
+ .ok_or_else(|| "item element without identifier")?,
+ title,
+ description,
+ link,
+ author,
+ published_at,
+ });
}
+ _ => {}
}
}
@@ -151,72 +156,74 @@ impl Feed {
let mut items = Vec::new();
for child in feed.children() {
- if child.is_element() {
- match child.tag_name().name() {
- "title" => title = child.text().map(str::trim).map(str::to_string),
- "subtitle" => description = child.text().map(str::trim).map(str::to_string),
- "link" => match child.attribute("rel") {
- Some("alternate") | None => {
- link = child.attribute("href").map(str::to_string)
+ if !child.is_element() || child.tag_name().namespace() != Some(ATOM_NS) {
+ continue;
+ }
+ match child.tag_name().name() {
+ "title" => title = child.text().map(str::trim).map(str::to_string),
+ "subtitle" => description = child.text().map(str::trim).map(str::to_string),
+ "link" => match child.attribute("rel") {
+ Some("alternate") | None => link = child.attribute("href").map(str::to_string),
+ _ => {}
+ },
+ "entry" => {
+ let mut guid = None;
+ let mut title = None;
+ let mut description = None;
+ let mut link = None;
+ let mut author = None;
+ let mut published_at = None;
+ for child in child.children() {
+ if !child.is_element() || child.tag_name().namespace() != Some(ATOM_NS) {
+ continue;
}
- _ => {}
- },
- "entry" => {
- let mut guid = None;
- let mut title = None;
- let mut description = None;
- let mut link = None;
- let mut author = None;
- let mut published_at = None;
- for child in child.children() {
- match child.tag_name().name() {
- "id" => guid = child.text().map(str::trim).map(str::to_string),
- "title" => title = child.text().map(str::trim).map(str::to_string),
- "content" => description = child.text().map(sanitize_html),
- "link" => match child.attribute("rel") {
- Some("alternate") | None => {
- link = child.attribute("href").map(str::to_string)
- }
- _ => {}
- },
- "author" => {
- for child in child.children() {
- match child.tag_name().name() {
- "name" => {
- author =
- child.text().map(str::trim).map(str::to_string);
- break;
- }
- "email" => {
- author =
- child.text().map(str::trim).map(str::to_string);
- // Fallback if `name` is not defined later.
- }
- _ => {}
+ match child.tag_name().name() {
+ "id" => guid = child.text().map(str::trim).map(str::to_string),
+ "title" => title = child.text().map(str::trim).map(str::to_string),
+ "content" => description = child.text().map(sanitize_html),
+ "link" => match child.attribute("rel") {
+ Some("alternate") | None => {
+ link = child.attribute("href").map(str::to_string)
+ }
+ _ => {}
+ },
+ "author" => {
+ for child in child.children() {
+ match child.tag_name().name() {
+ "name" => {
+ author =
+ child.text().map(str::trim).map(str::to_string);
+ break;
}
+ "email" => {
+ author =
+ child.text().map(str::trim).map(str::to_string);
+ // Fallback if `name` is not defined later.
+ }
+ _ => {}
}
}
- "updated" => {
- published_at = child.text().map(str::trim).and_then(|text| {
- DateTime::parse_from_rfc3339(text)
- .map(|dt| dt.naive_utc())
- .ok()
- });
- }
- _ => {}
}
+ "updated" => {
+ published_at = child.text().map(str::trim).and_then(|text| {
+ DateTime::parse_from_rfc3339(text)
+ .map(|dt| dt.naive_utc())
+ .ok()
+ });
+ }
+ _ => {}
}
- items.push(Item {
- guid: guid.ok_or_else(|| "entry element without identifier")?,
- title,
- description,
- link,
- author,
- published_at,
- });
}
- _ => {}
+ items.push(Item {
+ guid: guid.ok_or_else(|| "entry element without identifier")?,
+ title,
+ description,
+ link,
+ author,
+ published_at,
+ });
}
+ _ => {}
}
}