From 362c1fef908b991a750a1945468b2df9bab6ed17 Mon Sep 17 00:00:00 2001
From: Tuomas Siipola
Date: Tue, 7 Jan 2020 20:46:59 +0200
Subject: Check tag namespaces

---
 backend/src/feed_parser.rs | 249 +++++++++++++++++++++++----------------------
 1 file changed, 128 insertions(+), 121 deletions(-)

diff --git a/backend/src/feed_parser.rs b/backend/src/feed_parser.rs
index 43f3439..8822f31 100644
--- a/backend/src/feed_parser.rs
+++ b/backend/src/feed_parser.rs
@@ -38,6 +38,9 @@ fn sanitize_html(input: &str) -> String {
     builder.clean(input).to_string()
 }
 
+const ATOM_NS: &str = "http://www.w3.org/2005/Atom";
+const DC_NS: &str = "http://purl.org/dc/elements/1.1/";
+
 impl Feed {
     pub fn parse_rss(rss: &str) -> Result<Feed, String> {
         let doc = Document::parse(rss).map_err(|e| e.to_string())?;
@@ -56,77 +59,79 @@ impl Feed {
         let mut items = Vec::new();
 
         for child in channel.children() {
-            if child.is_element() {
-                match child.tag_name().name() {
-                    "title" => title = child.text().map(str::trim).map(str::to_string),
-                    "description" => description = child.text().map(str::trim).map(str::to_string),
-                    "link" => link = child.text().map(str::trim).map(str::to_string),
-                    "item" => {
-                        let mut guid = None;
-                        let mut title = None;
-                        let mut description = None;
-                        let mut link = None;
-                        let mut author = None;
-                        let mut published_at = None;
-                        for child in child.children() {
-                            match child.tag_name().name() {
-                                "guid" => {
-                                    if let Some(text) = child.text().map(str::trim) {
-                                        guid = Some(text.to_string());
-                                        if link.is_none() {
-                                            match child.attribute("isPermaLink") {
-                                                None | Some("true") => {
-                                                    link = Some(text.to_string())
-                                                }
-                                                _ => {}
-                                            }
+            if !child.is_element() || child.tag_name().namespace().is_some() {
+                continue;
+            }
+            match child.tag_name().name() {
+                "title" => title = child.text().map(str::trim).map(str::to_string),
+                "description" => description = child.text().map(str::trim).map(str::to_string),
+                "link" => link = child.text().map(str::trim).map(str::to_string),
+                "item" => {
+                    let mut guid = None;
+                    let mut title = None;
+                    let mut description = None;
+                    let mut link = None;
+                    let mut author = None;
+                    let mut published_at = None;
+                    for child in child.children() {
+                        if !child.is_element() {
+                            continue;
+                        }
+                        match (child.tag_name().namespace(), child.tag_name().name()) {
+                            (None, "guid") => {
+                                if let Some(text) = child.text().map(str::trim) {
+                                    guid = Some(text.to_string());
+                                    if link.is_none() {
+                                        match child.attribute("isPermaLink") {
+                                            None | Some("true") => link = Some(text.to_string()),
+                                            _ => {}
                                         }
                                     }
                                 }
-                                "title" => title = child.text().map(str::trim).map(str::to_string),
-                                "description" => description = child.text().map(sanitize_html),
-                                "link" => link = child.text().map(str::trim).map(str::to_string),
-                                "author" => {
-                                    // Fallback if `dc:creator` is not defined later.
-                                    if author.is_none() {
-                                        author = child.text().map(str::trim).map(str::to_string)
-                                    }
-                                }
-                                "creator" => {
-                                    if child.tag_name().namespace()
-                                        == Some("http://purl.org/dc/elements/1.1/")
-                                    {
-                                        author = child.text().map(str::trim).map(str::to_string);
-                                    }
-                                }
-                                "pubDate" => {
-                                    published_at = child.text().map(str::trim).and_then(|text| {
-                                        DateTime::parse_from_rfc2822(text)
-                                            .map(|dt| dt.naive_utc())
-                                            .ok()
-                                    });
+                            }
+                            (None, "title") => {
+                                title = child.text().map(str::trim).map(str::to_string)
+                            }
+                            (None, "description") => description = child.text().map(sanitize_html),
+                            (None, "link") => {
+                                link = child.text().map(str::trim).map(str::to_string)
+                            }
+                            (None, "author") => {
+                                // Fallback if `dc:creator` is not defined later.
+                                if author.is_none() {
+                                    author = child.text().map(str::trim).map(str::to_string)
                                 }
-                                _ => {}
                             }
+                            (Some(DC_NS), "creator") => {
+                                author = child.text().map(str::trim).map(str::to_string);
+                            }
+                            (None, "pubDate") => {
+                                published_at = child.text().map(str::trim).and_then(|text| {
+                                    DateTime::parse_from_rfc2822(text)
+                                        .map(|dt| dt.naive_utc())
+                                        .ok()
+                                });
+                            }
+                            _ => {}
                         }
-                        items.push(Item {
-                            guid: guid
-                                .or_else(|| {
-                                    link.as_ref()
-                                        .or_else(|| title.as_ref())
-                                        .or_else(|| description.as_ref())
-                                        .map(String::clone)
-                                })
-                                .ok_or_else(|| "item element without identifier")?,
-                            title,
-                            description,
-                            link,
-                            author,
-                            published_at,
-                        });
                     }
-                    _ => {}
+                    items.push(Item {
+                        guid: guid
+                            .or_else(|| {
+                                link.as_ref()
+                                    .or_else(|| title.as_ref())
+                                    .or_else(|| description.as_ref())
+                                    .map(String::clone)
+                            })
+                            .ok_or_else(|| "item element without identifier")?,
+                        title,
+                        description,
+                        link,
+                        author,
+                        published_at,
+                    });
                 }
+                _ => {}
             }
         }
 
@@ -151,72 +156,74 @@ impl Feed {
         let mut items = Vec::new();
 
         for child in feed.children() {
-            if child.is_element() {
-                match child.tag_name().name() {
-                    "title" => title = child.text().map(str::trim).map(str::to_string),
-                    "subtitle" => description = child.text().map(str::trim).map(str::to_string),
-                    "link" => match child.attribute("rel") {
-                        Some("alternate") | None => {
-                            link = child.attribute("href").map(str::to_string)
+            if !child.is_element() || child.tag_name().namespace() != Some(ATOM_NS) {
+                continue;
+            }
+            match child.tag_name().name() {
+                "title" => title = child.text().map(str::trim).map(str::to_string),
+                "subtitle" => description = child.text().map(str::trim).map(str::to_string),
+                "link" => match child.attribute("rel") {
+                    Some("alternate") | None => link = child.attribute("href").map(str::to_string),
+                    _ => {}
+                },
+                "entry" => {
+                    let mut guid = None;
+                    let mut title = None;
+                    let mut description = None;
+                    let mut link = None;
+                    let mut author = None;
+                    let mut published_at = None;
+                    for child in child.children() {
+                        if !child.is_element() || child.tag_name().namespace() != Some(ATOM_NS) {
+                            continue;
                         }
-                        _ => {}
-                    },
-                    "entry" => {
-                        let mut guid = None;
-                        let mut title = None;
-                        let mut description = None;
-                        let mut link = None;
-                        let mut author = None;
-                        let mut published_at = None;
-                        for child in child.children() {
-                            match child.tag_name().name() {
-                                "id" => guid = child.text().map(str::trim).map(str::to_string),
-                                "title" => title = child.text().map(str::trim).map(str::to_string),
-                                "content" => description = child.text().map(sanitize_html),
-                                "link" => match child.attribute("rel") {
-                                    Some("alternate") | None => {
-                                        link = child.attribute("href").map(str::to_string)
-                                    }
-                                    _ => {}
-                                },
-                                "author" => {
-                                    for child in child.children() {
-                                        match child.tag_name().name() {
-                                            "name" => {
-                                                author =
-                                                    child.text().map(str::trim).map(str::to_string);
-                                                break;
-                                            }
-                                            "email" => {
-                                                author =
-                                                    child.text().map(str::trim).map(str::to_string);
-                                                // Fallback if `name` is not defined later.
-                                            }
-                                            _ => {}
+                        match child.tag_name().name() {
+                            "id" => guid = child.text().map(str::trim).map(str::to_string),
+                            "title" => title = child.text().map(str::trim).map(str::to_string),
+                            "content" => description = child.text().map(sanitize_html),
+                            "link" => match child.attribute("rel") {
+                                Some("alternate") | None => {
+                                    link = child.attribute("href").map(str::to_string)
+                                }
+                                _ => {}
+                            },
+                            "author" => {
+                                for child in child.children() {
+                                    match child.tag_name().name() {
+                                        "name" => {
+                                            author =
+                                                child.text().map(str::trim).map(str::to_string);
+                                            break;
                                         }
+                                        "email" => {
+                                            author =
+                                                child.text().map(str::trim).map(str::to_string);
+                                            // Fallback if `name` is not defined later.
+                                        }
+                                        _ => {}
                                     }
                                 }
-                                "updated" => {
-                                    published_at = child.text().map(str::trim).and_then(|text| {
-                                        DateTime::parse_from_rfc3339(text)
-                                            .map(|dt| dt.naive_utc())
-                                            .ok()
-                                    });
-                                }
-                                _ => {}
                             }
+                            "updated" => {
+                                published_at = child.text().map(str::trim).and_then(|text| {
+                                    DateTime::parse_from_rfc3339(text)
+                                        .map(|dt| dt.naive_utc())
+                                        .ok()
+                                });
+                            }
+                            _ => {}
                         }
-                        items.push(Item {
-                            guid: guid.ok_or_else(|| "entry element without identifier")?,
-                            title,
-                            description,
-                            link,
-                            author,
-                            published_at,
-                        });
                     }
-                    _ => {}
+                    items.push(Item {
+                        guid: guid.ok_or_else(|| "entry element without identifier")?,
+                        title,
+                        description,
+                        link,
+                        author,
+                        published_at,
+                    });
                 }
+                _ => {}
             }
         }
 
-- 
cgit v1.1