<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 encoding=utf-8: */
// +----------------------------------------------------------------------+
// | Eventum - Issue Tracking System                                      |
// +----------------------------------------------------------------------+
// | Copyright (c) 2003, 2004, 2005, 2006, 2007 MySQL AB                  |
// |                                                                      |
// | This program is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU General Public License as published by |
// | the Free Software Foundation; either version 2 of the License, or    |
// | (at your option) any later version.                                  |
// |                                                                      |
// | This program is distributed in the hope that it will be useful,      |
// | but WITHOUT ANY WARRANTY; without even the implied warranty of       |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        |
// | GNU General Public License for more details.                         |
// |                                                                      |
// | You should have received a copy of the GNU General Public License    |
// | along with this program; if not, write to:                           |
// |                                                                      |
// | Free Software Foundation, Inc.                                       |
// | 59 Temple Place - Suite 330                                          |
// | Boston, MA 02111-1307, USA.                                          |
// +----------------------------------------------------------------------+
// | Authors: João Prado Maia <jpm@mysql.com>                             |
// +----------------------------------------------------------------------+
//

/**
* The MIME:: class provides methods for dealing with MIME standards.
*
* $Horde: horde/lib/MIME.php,v 1.121 2003/11/06 15:26:17 chuck Exp $
*
* Copyright 1999-2003 Chuck Hagenbuch <chuck@horde.org>
*
* See the enclosed file COPYING for license information (LGPL). If you
* did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
*
*/

require_once(APP_PEAR_PATH . "Mail/mimeDecode.php");
require_once(APP_INC_PATH . "class.error_handler.php");

/**
 * Class to handle the business logic related to the MIME email
 * processing. The is8bit(), endode() and _encode() functions come from
 * the excellent Horde package at http://www.horde.org. These functions are
 * licensed under the LGPL, and Horde's copyright notice is available
 * above.
 *
 * @version 1.0
 * @author João Prado Maia <jpm@mysql.com>
 */
class Mime_Helper
{
    /**
     * Method used to get charset from raw email.
     *
     * @access  public
     * @param   mixed   $input The full body of the message or decoded email.
     * @return  string charset extracted from Content-Type header of email.
     */
    function getCharacterSet($input)
    {
        if (!is_object($input)) {
            $structure = Mime_Helper::decode($input, false, false);
        } else {
            $structure = $input;
        }
        if (empty($structure)) {
            return false;
        }

        if ($structure->ctype_primary == 'multipart' and $structure->ctype_secondary == 'mixed'
            and count($structure->parts) >= 1 and $structure->parts[0]->ctype_primary == 'text') {
            $content_type = $structure->parts[0]->headers['content-type'];
        } else {
            $content_type = @$structure->headers['content-type'];
        }

        if (preg_match('/charset\s*=\s*(["\'])?([-\w\d]+)(\1)?;?/i', $content_type, $matches)) {
            return $matches[2];
        }

        return false;
    }


    /**
     * Returns the appropriate message body for a given MIME-based decoded
     * structure.
     *
     * @access  public
     * @param   object $output The parsed message structure
     * @return  string The message body
     * @see     Mime_Helper::decode()
     */
    function getMessageBody(&$output)
    {
        $parts = array();
        Mime_Helper::parse_output($output, $parts);
        if (empty($parts)) {
            Error_Handler::logError(array("Mime_Helper::parse_output failed. Corrupted MIME in email?", $output), __FILE__, __LINE__);
            // we continue as if nothing happened until it's clear it's right check to do.
        }
        $str = '';
        $is_html = false;
        if (isset($parts["text"])) {
            $str = join("\n\n", $parts["text"]);
        } elseif (isset($parts["html"])) {
            $is_html = true;
            $str = join("\n\n", $parts["html"]);

            // hack for inotes to prevent content from being displayed all on one line.
            $str = str_replace("</DIV><DIV>", "\n", $str);
            $str = str_replace(array("<br>", "<br />", "<BR>", "<BR />"), "\n", $str);
        }
        // XXX: do we also need to do something here about base64 encoding?
        if ($is_html) {
            $str = strip_tags($str);
        }
        return $str;
    }


    /**
     * Method used to fix the encoding of MIME based strings.
     *
     * @access  public
     * @param   string $input The string to be fixed
     * @return  string The fixed string
     */
    function fixEncoding($input)
    {
        // Remove white space between encoded-words
        $input = preg_replace('/(=\?[^?]+\?(q|b)\?[^?]*\?=)(\s)+=\?/i', '\1=?', $input);
        // For each encoded-word...
        while (preg_match('/(=\?([^?]+)\?(q|b)\?([^?]*)\?=)/i', $input, $matches)) {
            $encoded  = $matches[1];
            $charset  = $matches[2];
            $encoding = $matches[3];
            $text     = $matches[4];
            switch (strtolower($encoding)) {
                case 'b':
                    $text = base64_decode($text);
                    break;
                case 'q':
                    $text = str_replace('_', ' ', $text);
                    preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);
                    foreach($matches[1] as $value)
                        $text = str_replace('='.$value, chr(hexdec($value)), $text);
                    break;
            }
            $input = str_replace($encoded, $text, $input);
        }
        return $input;
    }


    /**
     * Method used to properly quote the sender of a given email address.
     *
     * @access  public
     * @param   string $address The full email address
     * @return  string The properly quoted email address
     */
    function quoteSender($address)
    {
        if (strstr($address, '<')) {
            $address = stripslashes($address);
            $first_part = substr($address, 0, strrpos($address, '<') - 1);
            $first_part = '"' . str_replace('"', '\"',($first_part)) . '"';
            $second_part = substr($address, strrpos($address, '<'));
            $address = $first_part . ' ' . $second_part;
        }
        return $address;
    }


    /**
     * Method used to remove any unnecessary quoting from an email address.
     *
     * @access  public
     * @param   string $address The full email address
     * @return  string The email address without quotes
     */
    function removeQuotes($address)
    {
        if (strstr($address, '<')) {
            $address = stripslashes($address);
            $first_part = substr($address, 0, strrpos($address, '<') - 1);
            $second_part = substr($address, strrpos($address, '<'));
            $address = $first_part;
        }
        if (preg_match('/^".*"/', $address)) {
            $address = preg_replace('/^"(.*)"/', '\\1', $address);
        }
        if (!empty($second_part)) {
            $address .= ' ' . $second_part;
        }
        return $address;
    }


    /**
     * Method used to properly encode an email address.
     *
     * @access  public
     * @param   string $address The full email address
     * @return  string The properly encoded email address
     */
    function encodeAddress($address)
    {
        $address = MIME_Helper::removeQuotes($address);
        if (Mime_Helper::is8bit($address)) {
            // split into name and address section
            preg_match("/(.*)<(.*)>/", $address, $matches);
           $address = "=?" . APP_CHARSET . "?Q?" .
                str_replace(' ', '_', trim(preg_replace('/([\x80-\xFF]|[\x21-\x2F]|[\xFC]|\[|\])/e', '"=" . strtoupper(dechex(ord(stripslashes("\1"))))', $matches[1]))) . "?= <" . $matches[2] . ">";
           return $address;
        } else {
            return MIME_Helper::quoteSender($address);
        }
    }


    /**
     * Decodes a quoted printable encoded address and returns the string.
     *
     * @param   string $address The address to decode
     * @return  string The decoded address
     */
    function decodeAddress($address)
    {
        if (preg_match("/=\?.+\?Q\?(.+)\?= <(.+)>/i", $address, $matches)) {
            return str_replace("_", ' ', quoted_printable_decode($matches[1])) . " <" . $matches[2] . ">";
        } else {
            return Mime_Helper::removeQuotes($address);
        }
    }


    /**
     * Returns if a specified string contains a quoted printable address.
     *
     * @param   string $address The address
     * @return  boolean If the address is quoted printable encoded.
     */
    function isQuotedPrintable($address)
    {
        if (preg_match("/=\?.+\?Q\?.+\?= <.+>/i", $address)) {
            return true;
        } else {
            return false;
        }
    }


    /**
     * Determine if a string contains 8-bit characters.
     *
     * @access public
     *
     * @param string $string  The string to check.
     *
     * @return boolean  True if it does, false if it doesn't.
     */
    function is8bit($string)
    {
        if (is_string($string) && preg_match('/[\x80-\xff]+/', $string)) {
            return true;
        } else {
            return false;
        }
    }


    /**
     * Encode a string containing non-ASCII characters according to RFC 2047.
     *
     * @access public
     *
     * @param string $text     The text to encode.
     * @param string $charset  (optional) The character set of the text.
     *
     * @return string  The text, encoded only if it contains non-ASCII
     *                 characters.
     */
    function encode($text, $charset = APP_CHARSET)
    {
        /* Return if nothing needs to be encoded. */
        if (!MIME_Helper::is8bit($text)) {
            return $text;
        }

        $charset = strtolower($charset);
        $line = '';

        /* Get the list of elements in the string. */
        $size = preg_match_all("/([^\s]+)([\s]*)/", $text, $matches, PREG_SET_ORDER);

        foreach ($matches as $key => $val) {
            if (MIME_Helper::is8bit($val[1])) {
                if ((($key + 1) < $size) &&
                    MIME_Helper::is8bit($matches[$key + 1][1])) {
                    $line .= MIME_Helper::_encode($val[1] . $val[2], $charset) . ' ';
                } else {
                    $line .= MIME_Helper::_encode($val[1], $charset) . $val[2];
                }
            } else {
                $line .= $val[1] . $val[2];
            }
        }

        return rtrim($line);
    }

    /**
     * Internal recursive function to RFC 2047 encode a string.
     *
     * @access private
     *
     * @param string $text     The text to encode.
     * @param string $charset  The character set of the text.
     *
     * @return string  The text, encoded only if it contains non-ASCII
     *                 characters.
     */
    function _encode($text, $charset)
    {
        $char_len = strlen($charset);
        $txt_len = strlen($text) * 2;

        /* RFC 2047 [2] states that no encoded word can be more than 75
           characters long. If longer, you must split the word. */
        if (($txt_len + $char_len + 7) > 75) {
            $pos = intval((68 - $char_len) / 2);
            return MIME_Helper::_encode(substr($text, 0, $pos), $charset) . ' ' . MIME_Helper::_encode(substr($text, $pos), $charset);
        } else {
            return '=?' . $charset . '?b?' . trim(base64_encode($text)) . '?=';
        }
    }


    /**
     * Method used to encode a given string in the quoted-printable standard.
     *
     * @access  public
     * @param   string $hdr_value The string to be encoded
     * @param   string $charset The charset of the string
     * @return  string The encoded string
     */
    function encodeValue($hdr_value, $charset = 'iso-8859-1')
    {
        preg_match_all('/(\w*[\x80-\xFF]+\w*)/', $hdr_value, $matches);
        foreach ($matches[1] as $value) {
            $replacement = preg_replace('/([\x80-\xFF])/e', '"=" . strtoupper(dechex(ord("\1")))', $value);
            $hdr_value = str_replace($value, '=?' . $charset . '?Q?' . $replacement . '?=', $hdr_value);
        }
        return $hdr_value;
    }


    /**
     * Given a string containing a header and body
     * section, this function will split them (at the first
     * blank line) and return them.
     *
     * @access  public
     * @param   string $input Input to split apart
     * @return  array Contains header and body section
     */
    function splitBodyHeader($input)
    {
        if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
            return array($match[1], $match[2]);
        }
    }


    /**
     * Parse headers given in $input and return
     * as assoc array.
     *
     * @access  public
     * @param   string $input Headers to parse
     * @return  array Contains parsed headers
     */
    function getHeaderNames($input)
    {
        if ($input !== '') {
            // Unfold the input
            $input   = preg_replace("/\r?\n/", "\r\n", $input);
            $input   = preg_replace("/\r\n(\t| )+/", ' ', $input);
            $headers = explode("\r\n", trim($input));
            foreach ($headers as $value) {
                $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
                $return[strtolower($hdr_name)] = $hdr_name;
            }
        } else {
            $return = array();
        }
        return $return;
    }


    /**
     * Method used to get an unique attachment name for a given
     * filename. This is specially useful for the emails that Microsoft
     * Outlook sends out with several attachments with the same name
     * when you embed several inline screenshots in the message
     *
     * @access  public
     * @param   array $list The nested array of mime parts
     * @param   string $filename The filename to search for
     * @return  string The unique attachment name
     */
    function getAttachmentName(&$list, $filename)
    {
        if (@in_array($filename, array_values($list))) {
            // check if the filename even has an extension...
            if (!strstr($filename, '.')) {
                $first_part = $filename;
            } else {
                $first_part = substr($filename, 0, strrpos($filename, '.'));
            }
            // check if this is already named Outlook-2.bmp (or similar)
            if (strstr($first_part, "-")) {
                // if so, gotta get the number and increment it
                $numeric_portion = substr($first_part, strrpos($first_part, "-")+1);
                if (preg_match("/^[0-9]+$/", $numeric_portion)) {
                    $numeric_portion = intval($numeric_portion) + 1;
                }
                $first_part = substr($first_part, 0, strrpos($first_part, "-"));
            } else {
                $numeric_portion = 1;
            }
            if (!strstr($filename, '.')) {
                $filename = $first_part . "-" . $numeric_portion;
            } else {
                $filename = $first_part . "-" . $numeric_portion . substr($filename, strrpos($filename, '.'));
            }
            return MIME_Helper::getAttachmentName($list, $filename);
        } else {
            return $filename;
        }
    }


    /**
     * Method used to check whether a given email message has any attachments.
     *
     * @access  public
     * @param   mixed   $message The full body of the message or parsed message structure.
     * @return  boolean
     */
    function hasAttachments($message)
    {
        if (!is_object($message)) {
            $message = Mime_Helper::decode($message, true);
        }
        $attachments = Mime_Helper::_getAttachmentDetails($message, TRUE);
        if (count($attachments) > 0) {
            return true;
        } else {
            return false;
        }
    }


    /**
     * Method used to parse and return the full list of attachments
     * associated with a message.
     *
     * @access  public
     * @param   mixed   $message The full body of the message or parsed message structure.
     * @return  array The list of attachments, if any
     */
    function getAttachments($message)
    {
        if (!is_object($message)) {
            $message = Mime_Helper::decode($message, true);
        }
        return Mime_Helper::_getAttachmentDetails($message, TRUE);
    }


    /**
     * Method used to parse and return the full list of attachment CIDs
     * associated with a message.
     *
     * @access  public
     * @param   mixed   $message The full body of the message or parsed message structure.
     * @return  array The list of attachment CIDs, if any
     */
    function getAttachmentCIDs($message)
    {
        if (!is_object($message)) {
            $message = Mime_Helper::decode($message, true);
        }
        return Mime_Helper::_getAttachmentDetails($message, true);
    }


    function _getAttachmentDetails(&$mime_part, $return_body = FALSE, $return_filename = FALSE, $return_cid = FALSE)
    {
        $attachments = array();
        if (isset($mime_part->parts)) {
            for ($i = 0; $i < count($mime_part->parts); $i++) {
                $t = Mime_Helper::_getAttachmentDetails($mime_part->parts[$i], $return_body, $return_filename, $return_cid);
                $attachments = array_merge($t, $attachments);
            }
        }
        // FIXME: content-type is always lowered by PEAR class (CHECKME) and why not $mime_part->content_type?
        $content_type = strtolower(@$mime_part->ctype_primary . '/' . @$mime_part->ctype_secondary);
        if ($content_type == '/') {
            $content_type = '';
        }
        $found = 0;
        // get the proper filename
        $mime_part_filename = @$mime_part->ctype_parameters['name'];
        if (empty($mime_part_filename)) {
            $mime_part_filename = @$mime_part->d_parameters['filename'];
        }
        // hack in order to treat inline images as normal attachments
        // (since Eventum does not display those embedded within the message)
        if (@$mime_part->ctype_primary == 'image') {
            // if requested, return only the details of a particular filename
            if (($return_filename != FALSE) && ($mime_part_filename != $return_filename)) {
                return array();
            }
            // if requested, return only the details of
            // a particular attachment CID. Only really needed
            // as hack for inline images
            if (($return_cid != FALSE) && (@$mime_part->headers['content-id'] != $return_cid)) {
                return array();
            }
            $found = 1;
        } else {
            if ((!in_array($content_type, Mime_Helper::_getInvalidContentTypes())) &&
                    (in_array(@strtolower($mime_part->disposition), Mime_Helper::_getValidDispositions())) &&
                    (!empty($mime_part_filename))) {
                // if requested, return only the details of a particular filename
                if (($return_filename != FALSE) && ($mime_part_filename != $return_filename)) {
                    return array();
                }
                $found = 1;
            }
        }
        if ($found) {
            $t = array(
                'filename' => $mime_part_filename,
                'cid'      => @$mime_part->headers['content-id'],
                'filetype' => $content_type
            );
            // only include the body of the attachment when
            // requested to save some memory
            if ($return_body == TRUE) {
                $t['blob'] = &$mime_part->body;
            }
            $attachments[] = $t;
        }

        return $attachments;
    }


    /**
     * Method used to get the encoded content of a specific message
     * attachment.
     *
     * @access  public
     * @param   mixed   $message The full content of the message or parsed message structure.
     * @param   string $filename The filename to look for
     * @param   string $cid The content-id to look for, if any
     * @return  string The full encoded content of the attachment
     */
    function getAttachment($message, $filename, $cid = FALSE)
    {
        $parts = array();
        if (!is_object($message)) {
            $message = Mime_Helper::decode($message, true);
        }
        $details = Mime_Helper::_getAttachmentDetails($message, TRUE, $filename, $cid);
        if (count($details) == 1) {
            return array(
                $details[0]['filetype'],
                $details[0]['blob']
            );
        } else {
            return array();
        }
    }


    /**
     * Method used to decode the content of a MIME encoded message.
     *
     * @access  public
     * @param   string $message The full body of the message
     * @param   boolean $include_bodies Whether to include the bodies in the return value or not
     * @return  mixed The decoded content of the message
     */
    function decode($message, $include_bodies = FALSE, $decode_bodies = TRUE)
    {
        // need to fix a pretty annoying bug where if the 'boundary' part of a
        // content-type header is split into another line, the PEAR library would
        // not work correctly. this fix will make the boundary part go to the
        // same line as the content-type one
        if (preg_match('/^boundary=/m', $message)) {
            $pattern = "#(Content-Type: multipart/.+); ?\r?\n(boundary=)$#im";
            $replacement = '$1; $2';
            $message = preg_replace($pattern, $replacement, $message);
        }

        $params = array(
            'crlf'           => "\r\n",
            'include_bodies' => $include_bodies,
            'decode_headers' => TRUE,
            'decode_bodies'  => $decode_bodies
        );
        $decode = new Mail_mimeDecode($message);
        return $decode->decode($params);
    }


    /**
     * Method used to parse the decoded object structure of a MIME
     * message into something more manageable.
     *
     * @access  public
     * @param   object $obj The decoded object structure of the MIME message
     * @param   array $parts The parsed parts of the MIME message
     * @return  void
     */
    function parse_output($obj, &$parts)
    {
        if (!empty($obj->parts)) {
            for ($i = 0; $i < count($obj->parts); $i++) {
                Mime_Helper::parse_output($obj->parts[$i], $parts);
            }
        } else {
            $ctype = @strtolower($obj->ctype_primary.'/'.$obj->ctype_secondary);
            switch($ctype){
                case 'text/plain':
                    if (((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'attachment')) || (!empty($obj->d_parameters['filename']))) {
                        @$parts['attachments'][] = $obj->body;
                    } else {
                        @$parts['text'][] = $obj->body;
                    }
                    break;
                case 'text/html':
                    if ((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'attachment')) {
                        @$parts['attachments'][] = $obj->body;
                    } else {
                        @$parts['html'][] = $obj->body;
                    }
                    break;
                // special case for Apple Mail
                case 'text/enriched':
                    if ((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'attachment')) {
                        @$parts['attachments'][] = $obj->body;
                    } else {
                        @$parts['html'][] = $obj->body;
                    }
                    break;
                default:
                    // avoid treating forwarded messages as attachments
                    if ((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'inline') &&
                            ($ctype != 'message/rfc822')) {
                        @$parts['attachments'][] = $obj->body;
                    } elseif (stristr($ctype, 'image')) {
                        // handle inline images
                        @$parts['attachments'][] = $obj->body;
                    } elseif(strtolower(@$obj->disposition) == 'attachment') {
                        @$parts['attachments'][] = $obj->body;
                    } else {
                        @$parts['text'][] = $obj->body;
                    }
            }
        }
    }


    /**
     * Given a quoted-printable string, this
     * function will decode and return it.
     *
     * @access private
     * @param  string Input body to decode
     * @return string Decoded body
     */
    function _quotedPrintableDecode($input)
    {
        // Remove soft line breaks
        $input = preg_replace("/=\r?\n/", '', $input);

        // Replace encoded characters
        $input = preg_replace('/=([a-f0-9]{2})/ie', "chr(hexdec('\\1'))", $input);

        return $input;
    }


    /**
     * Returns the internal list of content types that we do not support as
     * valid attachment types.
     *
     * @access private
     * @return array The list of content types
     */
    function _getInvalidContentTypes()
    {
        return array(
            'message/rfc822',
            'application/pgp-signature',
            'application/ms-tnef',
        );
    }


    /**
     * Returns the internal list of attachment dispositions that we do not
     * support as valid attachment types.
     *
     * @access private
     * @return array The list of valid dispositions
     */
    function _getValidDispositions()
    {
        return array(
            'attachment',
            'inline'
        );
    }


    /**
     * Splits the full email into headers and body
     *
     * @access  public
     * @param   string $message The full email message
     * @param   boolean $unfold If headers should be unfolded
     * @return  array An array containing the headers and body
     */
    function splitHeaderBody($message, $unfold = true)
    {
        if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $message, $match)) {
            return array(($unfold) ? Mail_API::unfold($match[1]) : $match[1], $match[2]);
        }
        return array();
    }
}

// benchmarking the included file (aka setup time)
if (APP_BENCHMARK) {
    $GLOBALS['bench']->setMarker('Included Mime_Helper Class');
}
