diff -r 9cd1f010c551 src/index/mimetype.cpp
--- a/src/index/mimetype.cpp	Tue Oct 11 13:31:01 2011 +0200
+++ b/src/index/mimetype.cpp	Thu Oct 13 19:29:01 2011 +0200
@@ -71,10 +71,32 @@
 	}
 	// LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str()));
 
-	// The result of 'file' execution begins with the file name
-	// which may contain spaces. We happen to know its size, so
-	// strip it:
+	// The normal output from "file -i" looks like the following:
+	//   thefilename.xxx: text/plain; charset=us-ascii
+	// Sometimes the semi-colon is missing like in:
+	//     mimetype.cpp: text/x-c charset=us-ascii
+	// And sometimes we only get the mime type. This apparently happens
+	// when 'file' believes that the file name is binary
+
+	trimstring(result, " \t\n\r");
+
+	// If there is no colon and there is a slash, this is hopefuly
+	// the mime type
+	if (result.find_first_of(":") == string::npos && 
+	    result.find_first_of("/") != string::npos) {
+	    return result;
+	}
+
+	// Else the result should begin with the file name. Get rid of it:
+	if (result.find(fn) != 0) {
+	    // Garbage "file" output. Maybe the result of a charset
+	    // conversion attempt?
+	    LOGERR(("mimetype: can't interpret 'file' output: [%s]\n",
+		    result.c_str()));
+	    return string();
+	}
 	result = result.substr(fn.size());
+
 	// Now looks like ": text/plain; charset=us-ascii"
 	// Split it, and take second field
 	list<string> res;

