<?php

/**
 *
 * Name Extractor
 * Copyright (c) 2012 Peter Kahl. All rights reserved.
 * Use of this source code is governed by a GNU General Public License
 * that can be found in the LICENSE file.
 *
 * https://github.com/peterkahl/name-extractor
 *
 */


class name_extractor {

	protected $dict_array = array();
	protected $longest_word = 14;

	//------------------------------------------------------------------

	public function __construct() {
		require dirname(__FILE__).'/dictionary-names.php';
		$this->dict_array = array_flip($dictArray); // speed trick
	}

	//------------------------------------------------------------------

	public function extract_name($str) {

		$str = trim(strtolower($str));

		// check if "First Last<user@domain.tld>"
		if (stristr($str, '<') !== false) {
			$name = trim(reset(explode('<', $str)));
			if (strlen($name) > 0) {
				return $this->ucfirst_words($name); // Got it!!!
			}
			// clean up the email
			$pos_lt = strpos($str, '<');
			$str = substr($str, ($pos_lt + 1));
		}
		$str = reset(explode('@', $str));

		// name is short?
		if (strlen($str) < 4) return ucfirst($str); // Got it!!!

		// contains (.) dots?
		if (stristr($str, '.') !== false) {
			return $this->ucfirst_words(str_replace('.', ' ', $str)); // Got it!!!
		}
		// contains (_) underscore?
		elseif (stristr($str, '_') !== false) {
			return $this->ucfirst_words(str_replace('_', ' ', $str)); // Got it!!!
		}
		// contains (-) hyphen?
		elseif (stristr($str, '-') !== false) {
			return $this->ucfirst_words(str_replace('-', ' ', $str)); // Got it!!!
		}

		// check dictionary
		if (strlen($str) <= $this->longest_word) {
			if (array_key_exists($str, $this->dict_array)) {
				return ucfirst($str); // Got it!!!
			}
		}

		// must break string
		$new = preg_replace('#[0-9]{1,}#', ' ', $str);
		if ($new != ' ' && strlen($new) > 3) {
			$nameArr = explode(' ', $new);
			$str = '';
			foreach ($nameArr as $frag) {
				$frag = strtolower($frag);
				if (strlen($frag) > 2) {
					$arr = $this->breakString($frag);
					$frag = $this->ucfirst_words($arr);
				}
				$str .= ' '. ucfirst($frag);
			}
			$str = trim($str);
		}
		return $str;
	}

	//------------------------------------------------------------------

	public function breakString($str) {

		$wc = 0; // counts segmented words
		$str_length = strlen($str);
		if ($str_length < $this->longest_word) $maxlen = $str_length;
		else $maxlen = $this->longest_word;

		// $n .... position (index) in email

		for ($n = 0; $n < $str_length; ) {

			// build a word with 1 character
			$word[$wc] = substr($str, $n, 1);
			$m = 1; // count chars in word

			$test = $word[$wc];
			$found = false;
			// keep incrementing
			while ($m <= $maxlen && ($n+$m) < $str_length) {
				$test .= substr($str, $n+$m, 1); // append 1 character

				// try to find the word in dictionary
				if (array_key_exists($test, $this->dict_array)) {
					$word[$wc] = $test; // because word test exists
					$k = $m;
					$found = true;
				}
				$m++; // number of chars in word
			}
			if ($found) $n += $k+1;
			else $n++;
			$wc++;
		}

		//return $word;

		// glue together single characters
		$n = 0;
		$single = false;
		foreach ($word as $key => $val) {
			if (strlen($val) > 1) {
				if ($single == true) {
					$n++;
					$single = false;
				}
				$new[$n] = $val;
				$n++;
			}
			else {
				$single = true;
				if (!isset($new[$n])) $new[$n] = '';
				$new[$n] .= $val;
			}
		}
		return $new; // array
	}

	//------------------------------------------------------------------
	// accepts array or string
	public function ucfirst_words($arr) {
		if (!is_array($arr)) $arr = explode(' ', $arr);
		$new = '';
		foreach ($arr as $val) {
			$new .= ' '.ucfirst($val);
		}
		return trim($new);
	}

}
//----------------------------------------------------------------------

?>
