[Slim-Checkins] r10564 - /trunk/server/Slim/Utils/Unicode.pm

dsully at svn.slimdevices.com dsully at svn.slimdevices.com
Thu Nov 2 15:38:43 PST 2006


Author: dsully
Date: Thu Nov  2 15:38:43 2006
New Revision: 10564

URL: http://svn.slimdevices.com?rev=10564&view=rev
Log:
Bug: N/A
Description: Rework some of the Unicode guessing. Insert Encode::Detect after UTF but before ISO-8859-1. Always load Encode::Guess.

Modified:
    trunk/server/Slim/Utils/Unicode.pm

Modified: trunk/server/Slim/Utils/Unicode.pm
URL: http://svn.slimdevices.com/trunk/server/Slim/Utils/Unicode.pm?rev=10564&r1=10563&r2=10564&view=diff
==============================================================================
--- trunk/server/Slim/Utils/Unicode.pm (original)
+++ trunk/server/Slim/Utils/Unicode.pm Thu Nov  2 15:38:43 2006
@@ -47,6 +47,7 @@
 {
 	# We implement a decode() & encode(), so don't import those.
 	require Encode;
+	require Encode::Guess;
 
 	$FB_QUIET = Encode::FB_QUIET();
 
@@ -155,20 +156,16 @@
 	if (!$@) {
 
 		$encodeDetect = 1;
-
-	} else {
-
-		require Encode::Guess;
-
-		$encodeDetect = 0;
-		$Encode::Guess::NoUTFAutoGuess = 1;
-
-		# Setup suspects for Encode::Guess based on the locale - we might also
-		# want to use our own Language pref?
-		if ($lc_ctype ne 'utf8') {
-
-			Encode::Guess->add_suspects($lc_ctype);
-		}
+	}
+
+	# Setup Encode::Guess
+	$Encode::Guess::NoUTFAutoGuess = 1;
+
+	# Setup suspects for Encode::Guess based on the locale - we might also
+	# want to use our own Language pref?
+	if ($lc_ctype ne 'utf8') {
+
+		Encode::Guess->add_suspects($lc_ctype);
 	}
 
 	# Create a regex for looks_like_utf8()
@@ -763,39 +760,46 @@
 
 sub encodingFromString {
 
-	my $encoding = 'raw';
-
 	# Don't copy a potentially large string - just read it from the stack.
 	if (looks_like_ascii($_[0])) {
 
-		$encoding = 'ascii';
+		return 'ascii';
 
 	} elsif (looks_like_utf32($_[0])) {
 
-		$encoding = 'utf-32';
+		return 'utf-32';
 
 	} elsif (looks_like_utf16($_[0])) {
 	
-		$encoding = 'utf-16';
+		return 'utf-16';
 
 	} elsif (looks_like_utf8($_[0])) {
 	
-		$encoding = 'utf8';
-
-	} elsif (looks_like_latin1($_[0])) {
+		return 'utf8';
+	}
+
+	# Check Encode::Detect::Detector before ISO-8859-1, as it can find
+	# overlapping charsets.
+	if ($encodeDetect) {
+
+		my $charset = Encode::Detect::Detector::detect($_[0]);
+
+		if ($charset) {
+
+			return lc($charset);
+		}
+	}
+
+	if (looks_like_latin1($_[0])) {
 	
-		$encoding = 'iso-8859-1';
+		return 'iso-8859-1';
 
 	} elsif (looks_like_cp1252($_[0])) {
 	
-		$encoding = 'cp1252';
-
-	} elsif ($encodeDetect && $_[0] && !Encode::is_utf8($_[0])) {
-
-		$encoding  = lc(Encode::Detect::Detector::detect($_[0]));
-	}
-
-	return $encoding;
+		return 'cp1252';
+	}
+
+	return 'raw';
 }
 
 =head2 encodingFromFileHandle( $fh )



More information about the checkins mailing list