get_dom_document(); if ( is_wp_error( $dom ) ) { return $dom; } if ( ! class_exists( '\Webmention\Mf2\Parser' ) ) { require_once WEBMENTION_PLUGIN_DIR . '/libraries/mf2/Mf2/Parser.php'; } $source_url = $response->get_url(); $parser = new Parser( $dom, $source_url ); $data = $parser->parse(); // Attempts to remove everything but the representative item. $item = $this->get_representative_item( $data, $target_url ); if ( ! $item ) { return false; } $author = $this->get_representative_author( $item, $data ); // add response type $response_type = $this->get_response_type( $item, $data, $target_url ); $this->set_properties( $item ); $this->set_property_author( $author ); $this->webmention_item->add_response_type( wp_slash( $response_type ) ); $this->webmention_item->add_url( $source_url ); // If there is no URL property then use the retrieved URL. return true; } /** * Takes mf2 item and generates a Webmention Item. * * @param array $mf_array JSON Array of Parsed Microformats. * @return WP_Error|true Return error or true if successful. */ public function set_properties( $mf_array ) { // Only store the raw representative item and discard other information. $this->webmention_item->add_raw( $mf_array ); // Retrieve time properties if available. $this->webmention_item->add_published( $this->get_datetime_property( 'published', $mf_array ) ); $this->webmention_item->add_updated( $this->get_datetime_property( 'updated', $mf_array ) ); $this->webmention_item->add_url( $this->get_plaintext( $mf_array, 'url' ) ); // Sometimes the featured image is stored in featured. Otherwise try photo. $this->webmention_item->add_photo( $this->get_plaintext( $mf_array, 'featured' ) ); $this->webmention_item->add_photo( $this->get_plaintext( $mf_array, 'photo' ) ); $content = $this->get_html( $mf_array, 'content' ); $this->webmention_item->add_content( $content ); $summary = $this->get_plaintext( $mf_array, 'summary' ); if ( empty( $summary ) ) { $summary = $this->generate_summary( $content ); } $this->webmention_item->add_summary( $summary ); $this->webmention_item->add_meta( apply_filters( 'webmention_handler_mf2_set_properties', array(), $this ) ); return true; } /** * Takes author property and returns simplified array of selected properties. * * @param array $mf_array * @param array Author array. */ protected function set_property_author( $properties ) { $author = array( 'type' => 'card' ); if ( $this->is_microformat( $properties ) ) { foreach ( array( 'name', 'nickname', 'given-name', 'family-name', 'url', 'email', 'photo' ) as $prop ) { $author[ $prop ] = $this->get_plaintext( $properties, $prop ); } } $this->webmention_item->add_author( array_filter( $author ) ); } /** * Is string a URL. * * @param array $string * * @return bool */ protected function is_url( $str ) { if ( ! is_string( $str ) ) { return false; } // If debugging is on just validate that URL is validly formatted if ( WP_DEBUG ) { return filter_var( $str, FILTER_VALIDATE_URL ) !== false; } // If debugging is off limit based on WordPress parameters return wp_http_validate_url( $str ); } /** * Is this what type? * * @param array $mf_array Parsed Microformats Array * @param string $type Type * * @return bool */ protected function is_type( $mf_array, $type ) { return is_array( $mf_array ) && ! empty( $mf_array['type'] ) && is_array( $mf_array['type'] ) && in_array( $type, $mf_array['type'], true ); } /** * Returns type * * @param array $mf_array Microformats Array. * @return string|null Return value. */ protected function get_type( $mf_array ) { if ( ! $this->is_microformat( $mf_array ) ) { return null; } return str_replace( 'h-', '', $mf_array['type'][0] ); } /** * Verifies if $mf_array is an array without numeric keys, and has a 'properties' key. * * @param $mf_array * * @return bool */ protected function is_microformat( $mf_array ) { return ( is_array( $mf_array ) && ! wp_is_numeric_array( $mf_array ) && ! empty( $mf_array['type'] ) && isset( $mf_array['properties'] ) ); } /** * Verifies if $mf_array has an 'items' key which is also an array, returns true. * * @param $mf_array * * @return bool */ protected function is_microformat_collection( $mf_array ) { return ( is_array( $mf_array ) && isset( $mf_array['items'] ) && is_array( $mf_array['items'] ) ); } /** * Verifies if property named $propname is in array $mf_array. * * @param array $mf_array * @param string $propname * * @return bool */ protected function has_property( array $mf_array, $propname ) { return ! empty( $mf_array['properties'][ $propname ] ) && is_array( $mf_array['properties'][ $propname ] ); } /** * Verifies if property named $propname is in array $mf_array and is a valid URL. * * @param array $mf_array * @param string $propname * * @return bool */ protected function has_url_property( array $mf_array, $propname ) { return ( $this->has_property( $mf_array, $propname ) && ( $this->is_url( $this->get_plaintext( $mf_array, $propname ) ) ) ); } /** * Verifies if rel named $relname is in array $mf_array. * * @param array $mf_array * @param string $relname * * @return bool */ protected function has_rel( array $mf_array, $relname ) { return ! empty( $mf_array['rels'][ $relname ] ) && is_array( $mf_array['rels'][ $relname ] ); } /** * Verifies if $property is an array without numeric keys and has key 'value' and 'html' set. * * @param $property * * @return bool */ protected function is_embedded_html( $property ) { return is_array( $property ) && ! wp_is_numeric_array( $property ) && isset( $property['value'] ) && isset( $property['html'] ); } /** * If $value is a microformat or embedded html, return $value['value']. Else return v. * * @param $value * * @return mixed */ protected function to_plaintext( $value ) { if ( $this->is_microformat( $value ) || $this->is_embedded_html( $value ) ) { return $value['value']; } elseif ( is_array( $value ) && isset( $value['text'] ) ) { return $value['text']; } return $value; } /** * Returns property $propname $fallback. * * @param array $mf Microformats Array. * @param $propname Property to be retrieved. * @param null|string $fallback Fallback if not available. * @return mixed|null Return value. */ protected function get_property( array $mf_array, $propname, $fallback = null ) { if ( ! empty( $mf_array['properties'][ $propname ] ) && is_array( $mf_array['properties'][ $propname ] ) ) { return $mf_array['properties'][ $propname ]; } return $fallback; } /** * Returns plaintext of $propname with optional $fallback. * * @param array $mf_array Microformats Array. * @param $propname Property to be retrieved. * @param null|string $fallback Fallback if not available. * @return mixed|null Return value. */ protected function get_plaintext( array $mf_array, $propname, $fallback = null ) { if ( ! array_key_exists( 'properties', $mf_array ) ) { return $fallback; } if ( ! empty( $mf_array['properties'][ $propname ] ) && is_array( $mf_array['properties'][ $propname ] ) ) { return $this->to_plaintext( current( $mf_array['properties'][ $propname ] ) ); } return $fallback; } /** * Returns ['html'] element of $value, or ['value'] or just $value, in order of availablility. * * @param $value Microformats Content. * @return mixed HTML Element if present. */ protected function to_html( $value ) { if ( $this->is_embedded_html( $value ) ) { return $value['html']; } elseif ( $this->is_microformat( $value ) ) { return webmention_sanitize_html( htmlspecialchars( $value['value'] ) ); } return webmention_sanitize_html( htmlspecialchars( $value ) ); } /** * Gets HTML of $propname or if not, $fallback. * * @param array $mf_array Microformats JSON array. * @param $propname Property Name. * @param null|string $fallback Fallback if property not found. * @return mixed|null Value of proerty. */ protected function get_html( array $mf_array, $propname, $fallback = null ) { if ( ! empty( $mf_array['properties'][ $propname ] ) && is_array( $mf_array['properties'][ $propname ] ) ) { return $this->to_html( current( $mf_array['properties'][ $propname ] ) ); } return $fallback; } /** * Gets the DateTime properties including published or updated, depending on params. * * @param $name string updated or published. * @param array $mf_array Microformats JSON array. * @param null|DateTimeImmutable $fallback What to return if not a DateTime property. * @return mixed|null */ protected function get_datetime_property( $name, array $mf_array, $fallback = null ) { if ( $this->has_property( $mf_array, $name ) ) { $return = $this->get_plaintext( $mf_array, $name ); } else { return $fallback; } try { return new DateTimeImmutable( $return ); } catch ( Exception $e ) { return $fallback; } } /** * get all top-level items. * * @param array $mf_array the microformats array. * @param array an array of top level elements array. * * @return array Return the top level items in an array. */ protected function get_items( $mf_array ) { if ( ! $this->is_microformat_collection( $mf_array ) ) { return array(); } // Get first item. $first_item = current( $mf_array['items'] ); // Check if it is an h-feed. if ( $this->is_type( $first_item, 'h-feed' ) && array_key_exists( 'children', $first_item ) ) { $mf_array['items'] = $first_item['children']; } // Return entries. return $mf_array['items']; } /** * helper to find the correct h-entry node * * @param array $mf_array the parsed microformats array * @param string $url the url * * @return array the h-card node or false */ public function find_representative_hcard( $mf_array, $url ) { if ( ! $this->is_microformat_collection( $mf_array ) ) { return false; } $items = $mf_array['items']; if ( ! is_array( $items ) || empty( $items ) ) { return false; } foreach ( $items as $item ) { if ( $this->is_type( $item, 'h-card' ) && $this->urls_match( $url, $this->get_plaintext( $item, 'url' ) ) ) { return $item; } } return false; } /** * helper to find the correct h-entry node * * @param array $mf_array the parsed microformats array * @param string $target the target url * * @return array the h-entry node or false */ public function find_representative_item( $mf_array, $target ) { $items = $this->get_items( $mf_array ); if ( ! is_array( $items ) || empty( $items ) ) { return false; } foreach ( $items as $item ) { // check properties if ( isset( $item['properties'] ) ) { // check properties if target urls was mentioned foreach ( $item['properties'] as $key => $values ) { // check "normal" links if ( $this->compare_urls( $target, $values ) ) { return $item; } // check included h-* formats and their links foreach ( $values as $obj ) { // check if reply is a "cite" if ( isset( $obj['type'] ) && array_intersect( array( 'h-entry', 'h-cite' ), $obj['type'] ) ) { // check url if ( isset( $obj['properties'] ) ) { if ( isset( $obj['properties']['url'] ) ) { // check target if ( $this->compare_urls( $target, $obj['properties']['url'] ) ) { return $item; } } // check properties if target urls was mentioned foreach ( $obj['properties'] as $obj_key => $obj_values ) { if ( $this->compare_urls( $target, $obj_values ) ) { return $item; } // Make sure this is a numeric array before checking this. if ( wp_is_numeric_array( $obj_values ) ) { $obj_value = current( $obj_values ); // check content for the link if ( 'content' === $obj_key && ! empty( $obj_value['html'] ) && is_string( $obj_value['html'] ) && preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $obj_value['html'], $context ) ) { return $item; } elseif ( 'summary' === $obj_key && is_string( $obj_value ) && preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $obj_value, $context ) ) { return $item; } } } } } } } // check properties if target urls was mentioned foreach ( $item['properties'] as $key => $values ) { if ( wp_is_numeric_array( $values ) ) { $value = current( $values ); // check content for the link if ( 'content' === $key && ! empty( $value['html'] ) && is_string( $value['html'] ) && preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $value['html'], $context ) ) { return $item; } elseif ( 'summary' === $key && is_string( $value ) && preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $value, $context ) ) { return $item; } } } } } // return first h-entry return false; } /** * Takes the mf2 json array passed through and returns a cleaned up representative item. * * @param $mf_array The entire mf array. * @param $url The source URL. * * @return array Return the representative item on the page. */ protected function get_representative_item( $mf_array, $url ) { $item = $this->find_representative_item( $mf_array, $url ); if ( empty( $item ) || ! is_array( $item ) ) { return array(); } return $item; } /** * Helper to find the correct author node. * * @param array $item Item to find an author on. * @param array $mf_array The parsed microformats array. * @param string $source The source url. * @see https://indieweb.org/authorship * * @return array|null the h-card node or null. */ protected function get_representative_author( $item, $mf_array ) { $authorpage = false; if ( $this->has_property( $item, 'author' ) ) { // Check if any of the values of the author property are an h-card. foreach ( $item['properties']['author'] as $author ) { if ( $this->is_type( $author, 'h-card' ) ) { // 5.1 "if it has an h-card, use it, exit." return $author; } elseif ( is_string( $author ) ) { if ( wp_http_validate_url( $author ) ) { // 5.2 "otherwise if author property is an http(s) URL, let the author-page have that URL" $authorpage = $author; } else { // 5.3 "otherwise use the author property as the author name, exit" // We can only set the name, no h-card or URL was found. $name = $this->get_plaintext( $item, 'author' ); } } else { // This case is only hit when the author property is an mf2 object that is not an h-card. $name = $this->get_plaintext( $item, 'author' ); } if ( ! $authorpage ) { return array( 'type' => array( 'h-card' ), 'properties' => array( 'name' => array( $name ), ), ); } } } // Some URLs have a separate h-card. While this is not in the authorship specification assume a top level h-card on a page is the author. $items = $this->get_items( $mf_array ); foreach ( $items as $item ) { if ( $this->is_type( $item, 'h-card' ) ) { return $item; } } // 6. "if no author page was found" ... check for rel-author link. if ( ! $authorpage ) { if ( isset( $mf_array['rels'] ) && isset( $mf_array['rels']['author'] ) ) { $authorpage = $mf_array['rels']['author'][0]; } } // 7. "if there is an author-page URL" . if ( $authorpage ) { if ( ! $this->urls_match( $authorpage, $this->get_plaintext( $mf_array, 'url' ) ) ) { return $this->parse_authorpage( $authorpage ); } } } /** * Compare an url with a list of urls. * * @param string $needle The target url. * @param array $haystack A list of urls. * @param boolean $schemeless Define if the target url should be checked with http:// and https:// . * * @return boolean */ public function compare_urls( $needle, $haystack, $schemeless = true ) { if ( ! $this->is_url( $needle ) ) { return false; } if ( is_array( reset( $haystack ) ) ) { return false; } if ( true === $schemeless ) { // Remove url-scheme. $schemeless_target = preg_replace( '/^https?:\/\//i', '', $needle ); // Add both urls to the needle. $needle = array( 'http://' . $schemeless_target, 'https://' . $schemeless_target ); } else { // Make $needle an array. $needle = array( $needle ); } // Compare both arrays. return array_intersect( $needle, $haystack ); } /** * See if urls match for each component of parsed urls. Return true if so. * * @param $url1 * @param $url2 * @return bool * @see parseUrl() */ protected function urls_match( $url1, $url2 ) { return ( normalize_url( $url1 ) === normalize_url( $url2 ) ); } /** * All supported url types * * @return array */ protected function get_class_mapper() { $class_mapper = array(); /* * These classes represent the source sending a webmention based on its display of a mention sent by the target. * This sets the type to target-update, indicating that the purpose of the webmention is to notify of an update to the original webmention. */ $class_mapper['comment'] = 'target-update'; $class_mapper['like'] = 'target-update'; $class_mapper['favorite'] = 'target-update'; $class_mapper['repost'] = 'target-update'; $class_mapper['bookmark'] = 'target-update'; $class_mapper['read'] = 'target-update'; $class_mapper['listen'] = 'target-update'; $class_mapper['watch'] = 'target-update'; /* * rsvp * @link https://indieweb.org/rsvp */ $class_mapper['rsvp'] = 'rsvp'; /* * invite * @link https://indieweb.org/invitation */ $class_mapper['invitee'] = 'invite'; /* * repost * @link https://indieweb.org/repost */ $class_mapper['repost-of'] = 'repost'; /* * likes * @link https://indieweb.org/likes */ $class_mapper['like-of'] = 'like'; /* * favorite * @link https://indieweb.org/favorite */ $class_mapper['favorite-of'] = 'favorite'; /* * bookmark * @link https://indieweb.org/bookmark */ $class_mapper['bookmark-of'] = 'bookmark'; /* * tag * @link https://indieweb.org/tag */ $class_mapper['tag-of'] = 'tag'; $class_mapper['category'] = 'tag'; /* * read * @link https://indieweb.org/read */ $class_mapper['read-of'] = 'read'; /* * listen * @link https://indieweb.org/listen */ $class_mapper['listen-of'] = 'listen'; /* * watch * @link https://indieweb.org/watch */ $class_mapper['watch-of'] = 'watch'; /* * follow * @link https://indieweb.org/follow */ $class_mapper['follow-of'] = 'follow'; /* * replies * @link https://indieweb.org/replies */ $class_mapper['in-reply-to'] = 'comment'; $class_mapper['reply'] = 'comment'; $class_mapper['reply-of'] = 'comment'; return apply_filters( 'webmention_mf2_class_mapper', $class_mapper ); } /** * All supported url types * * @return array */ protected function get_rel_mapper() { $rel_mapper = array(); /* * replies * @link https://indieweb.org/in-reply-to */ $rel_mapper['in-reply-to'] = 'comment'; $rel_mapper['reply-of'] = 'comment'; /* * bookmarks * @link https://microformats.org/wiki/rel-design-pattern#rel.3D.22bookmark.22 */ $rel_mapper['bookmark'] = 'bookmark'; /* * tags * @link https://microformats.org/wiki/rel-tag */ $rel_mapper['tag'] = 'tag'; return apply_filters( 'webmention_mf2_rel_mapper', $rel_mapper ); } /** * Check entry classes or document rels for post-type * * @param array $entry the represantative entry * @param array $mf_array the document * @param string $target the target url * * @return string the post-type */ protected function get_response_type( $entry, $mf_array, $target ) { $classes = $this->get_class_mapper(); // check properties for target-url foreach ( $entry['properties'] as $key => $values ) { // check u-* params if ( in_array( $key, array_keys( $classes ), true ) ) { // check RSVP if available if ( 'rsvp' === $key ) { return 'rsvp:' . $this->get_first_array_item( $values ); } // check "normal" links if ( $this->compare_urls( $target, $values ) ) { return $classes[ $key ]; } // iterate in-reply-tos foreach ( $values as $obj ) { // check if reply is a "cite" or "entry" if ( in_array( $this->get_type( $obj ), array( 'cite', 'entry' ), true ) ) { // check url if ( $this->has_property( $obj, 'url' ) ) { // check target if ( isset( $obj['properties']['url'] ) && $this->compare_urls( $target, $obj['properties']['url'] ) ) { return $classes[ $key ]; } } if ( isset( $obj['properties'] ) && is_array( $obj['properties'] ) ) { // check properties if target urls was mentioned foreach ( $obj['properties'] as $obj_key => $obj_values ) { if ( $this->compare_urls( $target, $obj_values ) ) { return $classes[ $key ]; } $obj_value = current( $obj_values ); // check content for the link if ( 'content' === $obj_key && ! empty( $obj_value['html'] ) && is_string( $obj_value['html'] ) && preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $obj_value['html'], $context ) ) { return $classes[ $key ]; } elseif ( 'summary' === $obj_key && is_string( $obj_value ) && preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $obj_value, $context ) ) { return $classes[ $key ]; } } } } } } } // check if site has any rels if ( ! isset( $mf_array['rels'] ) ) { return 'mention'; } $rels = $this->get_rel_mapper(); // check rels for target-url foreach ( $mf_array['rels'] as $key => $values ) { // check rel params if ( in_array( $key, array_keys( $rels ), true ) ) { foreach ( $values as $value ) { if ( $value === $target ) { return $rels[ $key ]; } } } } return 'mention'; } /** * Returns the first item in $val if it's a non-empty array, otherwise $val itself. * * @param array|null $val * * @return string */ protected function get_first_array_item( $val ) { if ( $val && is_array( $val ) ) { return $val[0]; } return $val; } /** * Takes a response object and returns an author * * @param string $url Author URL * * @return WP_Error|array Return error or author array if successful. */ public function parse_authorpage( $url ) { $response = Request::get( $url, false ); if ( is_wp_error( $response ) ) { return $response; } $dom = $response->get_dom_document(); if ( is_wp_error( $dom ) ) { return $dom; } if ( ! class_exists( '\Webmention\Mf2\Parser' ) ) { require_once WEBMENTION_PLUGIN_DIR . '/libraries/mf2/Mf2/Parser.php'; } $parser = new Parser( $dom, $url ); $data = $parser->parse(); $item = $this->find_representative_hcard( $data, $url ); $author = array( 'type' => array( 'h-card' ), 'properties' => array( 'url' => array( $url ), ), ); if ( ! empty( $item ) || is_array( $item ) ) { foreach ( array( 'name', 'nickname', 'given-name', 'family-name', 'email', 'photo' ) as $prop ) { if ( array_key_exists( $prop, $item['properties'] ) ) { $author['properties'][ $prop ] = $item['properties'][ $prop ]; } } } return $author; } }