mozilla · LordEidi · Jul 21, 2025 · Jul 21, 2025 · Jul 24, 2025 · Jul 25, 2025
diff --git a/README.md b/README.md
@@ -53,6 +53,9 @@ Returns an object containing the following properties:
 * `siteName`: name of the site;
 * `lang`: content language;
 * `publishedTime`: published time;
+* `modifiedTime`: modified time;
+* `favicon`: site favicon as URI, SVG format if available;
+* `image`: URI to article image, extracted from metadata;
 
 The `parse()` method works by modifying the DOM. This removes some elements in the web page, which may be undesirable. You can avoid this by passing the clone of the `document` object to the `Readability` constructor:
 

diff --git a/Readability.js b/Readability.js
@@ -1748,6 +1748,9 @@ Readability.prototype = {
           if (typeof parsed.datePublished === "string") {
             metadata.datePublished = parsed.datePublished.trim();
           }
+          if (typeof parsed.dateModified === "string") {
+            metadata.dateModified = parsed.dateModified.trim();
+          }
         } catch (err) {
           this.log(err.message);
         }
@@ -1771,11 +1774,11 @@ Readability.prototype = {
 
     // property is a space-separated list of values
     var propertyPattern =
-      /\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi;
+      /\s*(article|dc|dcterms|og|twitter)\s*:\s*(author|creator|description|image:alt|image|published_time|modified|title|site_name)\s*/gi;
 
     // name is a single value
     var namePattern =
-      /^\s*(?:(dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
+      /^\s*(?:(dc|dcterms|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
 
     // Find description tags.
     this._forEachNode(metaElements, function (element) {
@@ -1813,7 +1816,7 @@ Readability.prototype = {
     metadata.title =
       jsonld.title ||
       values["dc:title"] ||
-      values["dcterm:title"] ||
+      values["dcterms:title"] ||
       values["og:title"] ||
       values["weibo:article:title"] ||
       values["weibo:webpage:title"] ||
@@ -1835,7 +1838,7 @@ Readability.prototype = {
     metadata.byline =
       jsonld.byline ||
       values["dc:creator"] ||
-      values["dcterm:creator"] ||
+      values["dcterms:creator"] ||
       values.author ||
       values["parsely-author"] ||
       articleAuthor;
@@ -1844,7 +1847,7 @@ Readability.prototype = {
     metadata.excerpt =
       jsonld.excerpt ||
       values["dc:description"] ||
-      values["dcterm:description"] ||
+      values["dcterms:description"] ||
       values["og:description"] ||
       values["weibo:article:description"] ||
       values["weibo:webpage:description"] ||
@@ -1854,24 +1857,95 @@ Readability.prototype = {
     // get site name
     metadata.siteName = jsonld.siteName || values["og:site_name"];
 
+    // get image thumbnail
+    metadata.image = values["og:image"] || values.image || values["twitter:image"];
+
+    // get favicon
+    metadata.favicon = this._getArticleFavicon()
+
     // get article published time
     metadata.publishedTime =
       jsonld.datePublished ||
       values["article:published_time"] ||
       values["parsely-pub-date"] ||
       null;
 
+    // get modified date
+    metadata.modifiedTime =
+        jsonld.dateModified ||
+        values["article:modified_time"] ||
+        values["dcterms:modified"] ||
+        null;
+
     // in many sites the meta value is escaped with HTML entities,
     // so here we need to unescape it
     metadata.title = this._unescapeHtmlEntities(metadata.title);
     metadata.byline = this._unescapeHtmlEntities(metadata.byline);
     metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
     metadata.siteName = this._unescapeHtmlEntities(metadata.siteName);
     metadata.publishedTime = this._unescapeHtmlEntities(metadata.publishedTime);
+    metadata.modifiedTime = this._unescapeHtmlEntities(metadata.modifiedTime);
 
     return metadata;
   },
 
+  /**
+   * Trying to extract the favicon from the page
+   **/
+  _getArticleFavicon() {
+
+    // string to return
+    var favicon = "";
+
+    // find all ink tags
+    var metaElements = this._doc.getElementsByTagName("link");
+
+    // iterate over tags.
+    this._forEachNode(metaElements, function (element) {
+
+      // make sure the type is correct and element contains a href attribute
+      var rel = element.hasAttribute("rel") ? element.getAttribute("rel") : "";
+      if (rel === "icon" && element.hasAttribute("href")) {
+        favicon = element.getAttribute("href");
+
+        var type = element.hasAttribute("type") ? element.getAttribute("type") : "";
+        if(type === "image/svg+xml")
+        {
+          // svg wins as best quality format
+          return this._toAbsoluteURI(favicon);
 Array.prototype.forEach.call(nodeList, fn, this); 
 Array.prototype.forEach.call(nodeList, fn, this); 
+        }
+
+        // what is missing here is an algorithm which compares all href and selects the "best" size
+      }
+    });
+
+    // make sure to return an absolute URI
+    return this._toAbsoluteURI(favicon);
+  },
+
+  /**
+   * Convert a relative to an absolute URI
+   *
+   * @param {string} uri
+   **/
+  _toAbsoluteURI(uri) {
+
+    // stop processing if uri is empty
+    if(uri === ""){
+      return uri;
+    }
+
+    // try to parse into URL object
+    var absolute_uri = URL.parse(uri, this._doc.baseURI);
+    if(!absolute_uri){
+      // parsing failed, return original URI
+      return uri;
+    }
+
+    // parsing worked, return absolute URI
+    return absolute_uri.href;
+  },
+
   /**
    * Check if node is image, or if node contains exactly only one image
    * whether as a direct child or as its descendants.
@@ -2784,7 +2858,10 @@ Readability.prototype = {
       length: textContent.length,
       excerpt: metadata.excerpt,
       siteName: metadata.siteName || this._articleSiteName,
+      image: metadata.image,
+      favicon: metadata.favicon,
       publishedTime: metadata.publishedTime,
+      modifiedTime: metadata.modifiedTime,
     };
   },
 };