如何解析.apk包中的AndroidManifest.xml文件

该文件似乎是二进制XML格式。 什么是这种格式,如何以编程方式解析(而不是使用SDK中的aapt转储工具)?

这个二进制格式在这里的文档中没有讨论。

注意 :我想从Android环境以外的地方访问这些信息,最好来自Java。

使用android-apktool

有一个应用程序读取apk文件,并将XML解码为几乎原始的形式。

用法:

 apktool d Gmail.apk && cat Gmail/AndroidManifest.xml 

检查android-apktool获取更多信息

这个在Android上运行的Java方法记录了.apk包中AndroidManifest.xml文件的二进制格式(我已经能够解释)。 第二个代码框显示如何调用decompressXML以及如何从设备上的应用程序包文件加载byte []。 (有些领域的目的我不明白,如果你知道他们的意思,告诉我,我会更新信息。)

 // decompressXML -- Parse the 'compressed' binary form of Android XML docs // such as for AndroidManifest.xml in .apk files public static int endDocTag = 0x00100101; public static int startTag = 0x00100102; public static int endTag = 0x00100103; public void decompressXML(byte[] xml) { // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, 4*4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) chars. int stOff = sitOff + numbStrings*4; // StringTable follows StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start tag. int xmlTagOff = LEW(xml, 3*4); // Start from the offset in the 3rd word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int) for (int ii=xmlTagOff; ii<xml.length-4; ii+=4) { if (LEW(xml, ii) == startTag) { xmlTagOff = ii; break; } } // end of hack, scanning for start of first start tag // XML tags and attributes: // Every XML start and end tag consists of 6 32 bit words: // 0th word: 02011000 for startTag and 03011000 for endTag // 1st word: a flag?, like 38000000 // 2nd word: Line of where this tag appeared in the original source file // 3rd word: FFFFFFFF ?? // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS // 5th word: StringIndex of Element Name // (Note: 01011000 in 0th word means end of XML document, endDocTag) // Start tags (not end tags) contain 3 more words: // 6th word: 14001400 meaning?? // 7th word: Number of Attributes that follow this tag(follow word 8th) // 8th word: 00000000 meaning?? // Attributes consist of 5 words: // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF // 1st word: StringIndex of Attribute Name // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used // 3rd word: Flags? // 4th word: str ind of attr value again, or ResourceId of value // TMP, dump string table to tr for debugging //tr.addSelect("strings", null); //for (int ii=0; ii<numbStrings; ii++) { // // Length of string starts at StringTable plus offset in StrIndTable // String str = compXmlString(xml, sitOff, stOff, ii); // tr.add(String.valueOf(ii), str); //} //tr.parent(); // Step through the XML tree element tags and attributes int off = xmlTagOff; int indent = 0; int startTagLineNo = -2; while (off < xml.length) { int tag0 = LEW(xml, off); //int tag1 = LEW(xml, off+1*4); int lineNo = LEW(xml, off+2*4); //int tag3 = LEW(xml, off+3*4); int nameNsSi = LEW(xml, off+4*4); int nameSi = LEW(xml, off+5*4); if (tag0 == startTag) { // XML START TAG int tag6 = LEW(xml, off+6*4); // Expected to be 14001400 int numbAttrs = LEW(xml, off+7*4); // Number of Attributes to follow //int tag8 = LEW(xml, off+8*4); // Expected to be 00000000 off += 9*4; // Skip over 6+3 words of startTag data String name = compXmlString(xml, sitOff, stOff, nameSi); //tr.addSelect(name, null); startTagLineNo = lineNo; // Look for the Attributes StringBuffer sb = new StringBuffer(); for (int ii=0; ii<numbAttrs; ii++) { int attrNameNsSi = LEW(xml, off); // AttrName Namespace Str Ind, or FFFFFFFF int attrNameSi = LEW(xml, off+1*4); // AttrName String Index int attrValueSi = LEW(xml, off+2*4); // AttrValue Str Ind, or FFFFFFFF int attrFlags = LEW(xml, off+3*4); int attrResId = LEW(xml, off+4*4); // AttrValue ResourceId or dup AttrValue StrInd off += 5*4; // Skip over the 5 words of an attribute String attrName = compXmlString(xml, sitOff, stOff, attrNameSi); String attrValue = attrValueSi!=-1 ? compXmlString(xml, sitOff, stOff, attrValueSi) : "resourceID 0x"+Integer.toHexString(attrResId); sb.append(" "+attrName+"=\""+attrValue+"\""); //tr.add(attrName, attrValue); } prtIndent(indent, "<"+name+sb+">"); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6*4; // Skip over 6 words of endTag data String name = compXmlString(xml, sitOff, stOff, nameSi); prtIndent(indent, "</"+name+"> (line "+startTagLineNo+"-"+lineNo+")"); //tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { prt(" Unrecognized tag code '"+Integer.toHexString(tag0) +"' at offset "+off); break; } } // end of while loop scanning tags and attributes of XML tree prt(" end at offset "+off); } // end of decompressXML public String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) { if (strInd < 0) return null; int strOff = stOff + LEW(xml, sitOff+strInd*4); return compXmlStringAt(xml, strOff); } public static String spaces = " "; public void prtIndent(int indent, String str) { prt(spaces.substring(0, Math.min(indent*2, spaces.length()))+str); } // compXmlStringAt -- Return the string stored in StringTable format at // offset strOff. This offset points to the 16 bit string length, which // is followed by that number of 16 bit (Unicode) chars. public String compXmlStringAt(byte[] arr, int strOff) { int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff; byte[] chars = new byte[strLen]; for (int ii=0; ii<strLen; ii++) { chars[ii] = arr[strOff+2+ii*2]; } return new String(chars); // Hack, just use 8 byte chars } // end of compXmlStringAt // LEW -- Return value of a Little Endian 32 bit word from the byte array // at offset off. public int LEW(byte[] arr, int off) { return arr[off+3]<<24&0xff000000 | arr[off+2]<<16&0xff0000 | arr[off+1]<<8&0xff00 | arr[off]&0xFF; } // end of LEW 

这个方法将AndroidManifest读入一个byte []进行处理:

 public void getIntents(String path) { try { JarFile jf = new JarFile(path); InputStream is = jf.getInputStream(jf.getEntry("AndroidManifest.xml")); byte[] xml = new byte[is.available()]; int br = is.read(xml); //Tree tr = TrunkFactory.newTree(); decompressXML(xml); //prt("XML\n"+tr.list()); } catch (Exception ex) { console.log("getIntents, ex: "+ex); ex.printStackTrace(); } } // end of getIntents 

大多数应用程序存储在/系统/应用程序是可读的,没有根我的Evo,其他应用程序在/数据/应用程序,我需要根看到。 上面的“路径”参数会是这样的:“/system/app/Weather.apk”

那么从Android SDK使用Android资产包装工具 (aapt)到Python(或其他)脚本怎么样?

通过aapt( http://elinux.org/Android_aapt ),的确可以检索有关.apk包及其AndroidManifest.xml文件的信息。 尤其是,您可以通过“dump”子命令提取.apk包的各个元素的值。 例如,您可以通过以下方式在.apk包中的AndroidManifest.xml文件中提取用户权限

 $ aapt dump permissions package.apk 

其中package.apk是您的.apk软件包。

而且,您可以使用Unix管道命令清除输出。 例如:

 $ aapt dump permissions package.apk | sed 1d | awk '{ print $NF }' 

这里有个编程的Python脚本:

 import os import subprocess #Current directory and file name: curpath = os.path.dirname( os.path.realpath(__file__) ) filepath = os.path.join(curpath, "package.apk") #Extract the AndroidManifest.xml permissions: command = "aapt dump permissions " + filepath + " | sed 1d | awk '{ print $NF }'" process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=None, shell=True) permissions = process.communicate()[0] print permissions 

以类似的方式,您可以提取AndroidManifest.xml的其他信息(例如应用程序名称等):

 #Extract the APK package info: shellcommand = "aapt dump badging " + filepath process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True) apkInfo = process.communicate()[0].splitlines() for info in apkInfo: #Package info: if string.find(info, "package:", 0) != -1: print "App Package: " + findBetween(info, "name='", "'") print "App Version: " + findBetween(info, "versionName='", "'") continue #App name: if string.find(info, "application:", 0) != -1: print "App Name: " + findBetween(info, "label='", "'") continue def findBetween(s, prefix, suffix): try: start = s.index(prefix) + len(prefix) end = s.index(suffix, start) return s[start:end] except ValueError: return "" 

相反,如果您想解析整个AndroidManifest XML树,可以使用xmltree命令以类似的方式执行此操作:

 aapt dump xmltree package.apk AndroidManifest.xml 

像以前一样使用Python:

 #Extract the AndroidManifest XML tree: shellcommand = "aapt dump xmltree " + filepath + " AndroidManifest.xml" process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True) xmlTree = process.communicate()[0] print "Number of Activities: " + str(xmlTree.count("activity")) print "Number of Services: " + str(xmlTree.count("service")) print "Number of BroadcastReceivers: " + str(xmlTree.count("receiver")) 

你可以使用android-random项目中开发的axml2xml.pl工具。 它将从二进制文件生成文本清单文件(AndroidManifest.xml)。

我说的是“ 文本 ”而不是“ 原创 ”,因为像许多逆向工程工具一样,这个工具并不完美, 结果也不完整 。 我认为要么不是功能完整,要么就是不兼容(使用更新的二进制编码方案)。 无论什么原因, axml2xml.pl工具将无法正确提取所有的属性值。 这些属性是minSdkVersion,targetSdkVersion,基本上是所有引用资源的属性(如字符串,图标等),也就是只有类名(活动,服务等)被正确提取。

但是,仍然可以通过在原始Android应用程序文件( .apk )上运行aapt工具来找到这些缺失的信息:

aapt l -a <someapp.apk>

检查以下WPF项目正确解码属性。

apk解析器, https://github.com/caoqianli/apk-parser ,一个轻量级的impl for java,没有依赖aapt或其他二进制文件,对于解析二进制xml文件和其他apk信息是很好的。

 ApkParser apkParser = new ApkParser(new File(filePath)); // set a locale to translate resource tag into specific strings in language the locale specified, you set locale to Locale.ENGLISH then get apk title 'WeChat' instead of '@string/app_name' for example apkParser.setPreferredLocale(locale); String xml = apkParser.getManifestXml(); System.out.println(xml); String xml2 = apkParser.transBinaryXml(xmlPathInApk); System.out.println(xml2); ApkMeta apkMeta = apkParser.getApkMeta(); System.out.println(apkMeta); Set<Locale> locales = apkParser.getLocales(); for (Locale l : locales) { System.out.println(l); } apkParser.close(); 

如果有用的话,下面是由Ribo发布的Java代码片段的C ++版本:

 struct decompressXML { // decompressXML -- Parse the 'compressed' binary form of Android XML docs // such as for AndroidManifest.xml in .apk files enum { endDocTag = 0x00100101, startTag = 0x00100102, endTag = 0x00100103 }; decompressXML(const BYTE* xml, int cb) { // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, cb, 4*4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) chars. int stOff = sitOff + numbStrings*4; // StringTable follows StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start tag. int xmlTagOff = LEW(xml, cb, 3*4); // Start from the offset in the 3rd word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int) for (int ii=xmlTagOff; ii<cb-4; ii+=4) { if (LEW(xml, cb, ii) == startTag) { xmlTagOff = ii; break; } } // end of hack, scanning for start of first start tag // XML tags and attributes: // Every XML start and end tag consists of 6 32 bit words: // 0th word: 02011000 for startTag and 03011000 for endTag // 1st word: a flag?, like 38000000 // 2nd word: Line of where this tag appeared in the original source file // 3rd word: FFFFFFFF ?? // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS // 5th word: StringIndex of Element Name // (Note: 01011000 in 0th word means end of XML document, endDocTag) // Start tags (not end tags) contain 3 more words: // 6th word: 14001400 meaning?? // 7th word: Number of Attributes that follow this tag(follow word 8th) // 8th word: 00000000 meaning?? // Attributes consist of 5 words: // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF // 1st word: StringIndex of Attribute Name // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used // 3rd word: Flags? // 4th word: str ind of attr value again, or ResourceId of value // TMP, dump string table to tr for debugging //tr.addSelect("strings", null); //for (int ii=0; ii<numbStrings; ii++) { // // Length of string starts at StringTable plus offset in StrIndTable // String str = compXmlString(xml, sitOff, stOff, ii); // tr.add(String.valueOf(ii), str); //} //tr.parent(); // Step through the XML tree element tags and attributes int off = xmlTagOff; int indent = 0; int startTagLineNo = -2; while (off < cb) { int tag0 = LEW(xml, cb, off); //int tag1 = LEW(xml, off+1*4); int lineNo = LEW(xml, cb, off+2*4); //int tag3 = LEW(xml, off+3*4); int nameNsSi = LEW(xml, cb, off+4*4); int nameSi = LEW(xml, cb, off+5*4); if (tag0 == startTag) { // XML START TAG int tag6 = LEW(xml, cb, off+6*4); // Expected to be 14001400 int numbAttrs = LEW(xml, cb, off+7*4); // Number of Attributes to follow //int tag8 = LEW(xml, off+8*4); // Expected to be 00000000 off += 9*4; // Skip over 6+3 words of startTag data std::string name = compXmlString(xml, cb, sitOff, stOff, nameSi); //tr.addSelect(name, null); startTagLineNo = lineNo; // Look for the Attributes std::string sb; for (int ii=0; ii<numbAttrs; ii++) { int attrNameNsSi = LEW(xml, cb, off); // AttrName Namespace Str Ind, or FFFFFFFF int attrNameSi = LEW(xml, cb, off+1*4); // AttrName String Index int attrValueSi = LEW(xml, cb, off+2*4); // AttrValue Str Ind, or FFFFFFFF int attrFlags = LEW(xml, cb, off+3*4); int attrResId = LEW(xml, cb, off+4*4); // AttrValue ResourceId or dup AttrValue StrInd off += 5*4; // Skip over the 5 words of an attribute std::string attrName = compXmlString(xml, cb, sitOff, stOff, attrNameSi); std::string attrValue = attrValueSi!=-1 ? compXmlString(xml, cb, sitOff, stOff, attrValueSi) : "resourceID 0x"+toHexString(attrResId); sb.append(" "+attrName+"=\""+attrValue+"\""); //tr.add(attrName, attrValue); } prtIndent(indent, "<"+name+sb+">"); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6*4; // Skip over 6 words of endTag data std::string name = compXmlString(xml, cb, sitOff, stOff, nameSi); prtIndent(indent, "</"+name+"> (line "+toIntString(startTagLineNo)+"-"+toIntString(lineNo)+")"); //tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { prt(" Unrecognized tag code '"+toHexString(tag0) +"' at offset "+toIntString(off)); break; } } // end of while loop scanning tags and attributes of XML tree prt(" end at offset "+off); } // end of decompressXML std::string compXmlString(const BYTE* xml, int cb, int sitOff, int stOff, int strInd) { if (strInd < 0) return std::string(""); int strOff = stOff + LEW(xml, cb, sitOff+strInd*4); return compXmlStringAt(xml, cb, strOff); } void prt(std::string str) { printf("%s", str.c_str()); } void prtIndent(int indent, std::string str) { char spaces[46]; memset(spaces, ' ', sizeof(spaces)); spaces[min(indent*2, sizeof(spaces) - 1)] = 0; prt(spaces); prt(str); prt("\n"); } // compXmlStringAt -- Return the string stored in StringTable format at // offset strOff. This offset points to the 16 bit string length, which // is followed by that number of 16 bit (Unicode) chars. std::string compXmlStringAt(const BYTE* arr, int cb, int strOff) { if (cb < strOff + 2) return std::string(""); int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff; char* chars = new char[strLen + 1]; chars[strLen] = 0; for (int ii=0; ii<strLen; ii++) { if (cb < strOff + 2 + ii * 2) { chars[ii] = 0; break; } chars[ii] = arr[strOff+2+ii*2]; } std::string str(chars); free(chars); return str; } // end of compXmlStringAt // LEW -- Return value of a Little Endian 32 bit word from the byte array // at offset off. int LEW(const BYTE* arr, int cb, int off) { return (cb > off + 3) ? ( arr[off+3]<<24&0xff000000 | arr[off+2]<<16&0xff0000 | arr[off+1]<<8&0xff00 | arr[off]&0xFF ) : 0; } // end of LEW std::string toHexString(DWORD attrResId) { char ch[20]; sprintf_s(ch, 20, "%lx", attrResId); return std::string(ch); } std::string toIntString(int i) { char ch[20]; sprintf_s(ch, 20, "%ld", i); return std::string(ch); } }; 

如果你进入Python或使用Androguard ,Androgax的Androaxml功能将为你做这个转换。 该功能在本博客文章中有详细介绍,附加文档在这里和源代码在这里 。

用法:

 $ ./androaxml.py -h Usage: androaxml.py [options] Options: -h, --help show this help message and exit -i INPUT, --input=INPUT filename input (APK or android's binary xml) -o OUTPUT, --output=OUTPUT filename output of the xml -v, --version version of the API $ ./androaxml.py -i yourfile.apk -o output.xml $ ./androaxml.py -i AndroidManifest.xml -o output.xml 

这里的参考是我的Ribo代码版本。 主要的区别是,decompressXML()直接返回一个字符串,这对我来说是一个更合适的用法。

注意:我使用Ribo解决方案的唯一目的是从Manifest XML文件中获取一个.APK文件的已发布版本,我确认为此目的,它的工作非常好。

编辑[2013-03-16]: 如果将版本设置为纯文本,它会很好地工作,但是如果设置为引用资源XML,则会显示为“资源0x1”。 在这种情况下,您可能必须将此解决方案耦合到另一个将获取正确的字符串资源引用的解决方案。

 /** * Binary XML doc ending Tag */ public static int endDocTag = 0x00100101; /** * Binary XML start Tag */ public static int startTag = 0x00100102; /** * Binary XML end Tag */ public static int endTag = 0x00100103; /** * Reference var for spacing * Used in prtIndent() */ public static String spaces = " "; /** * Parse the 'compressed' binary form of Android XML docs * such as for AndroidManifest.xml in .apk files * Source: http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package/4761689#4761689 * * @param xml Encoded XML content to decompress */ public static String decompressXML(byte[] xml) { StringBuilder resultXml = new StringBuilder(); // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, 4*4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) chars. int stOff = sitOff + numbStrings*4; // StringTable follows StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start tag. int xmlTagOff = LEW(xml, 3*4); // Start from the offset in the 3rd word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int) for (int ii=xmlTagOff; ii<xml.length-4; ii+=4) { if (LEW(xml, ii) == startTag) { xmlTagOff = ii; break; } } // end of hack, scanning for start of first start tag // XML tags and attributes: // Every XML start and end tag consists of 6 32 bit words: // 0th word: 02011000 for startTag and 03011000 for endTag // 1st word: a flag?, like 38000000 // 2nd word: Line of where this tag appeared in the original source file // 3rd word: FFFFFFFF ?? // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS // 5th word: StringIndex of Element Name // (Note: 01011000 in 0th word means end of XML document, endDocTag) // Start tags (not end tags) contain 3 more words: // 6th word: 14001400 meaning?? // 7th word: Number of Attributes that follow this tag(follow word 8th) // 8th word: 00000000 meaning?? // Attributes consist of 5 words: // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF // 1st word: StringIndex of Attribute Name // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used // 3rd word: Flags? // 4th word: str ind of attr value again, or ResourceId of value // TMP, dump string table to tr for debugging //tr.addSelect("strings", null); //for (int ii=0; ii<numbStrings; ii++) { // // Length of string starts at StringTable plus offset in StrIndTable // String str = compXmlString(xml, sitOff, stOff, ii); // tr.add(String.valueOf(ii), str); //} //tr.parent(); // Step through the XML tree element tags and attributes int off = xmlTagOff; int indent = 0; int startTagLineNo = -2; while (off < xml.length) { int tag0 = LEW(xml, off); //int tag1 = LEW(xml, off+1*4); int lineNo = LEW(xml, off+2*4); //int tag3 = LEW(xml, off+3*4); int nameNsSi = LEW(xml, off+4*4); int nameSi = LEW(xml, off+5*4); if (tag0 == startTag) { // XML START TAG int tag6 = LEW(xml, off+6*4); // Expected to be 14001400 int numbAttrs = LEW(xml, off+7*4); // Number of Attributes to follow //int tag8 = LEW(xml, off+8*4); // Expected to be 00000000 off += 9*4; // Skip over 6+3 words of startTag data String name = compXmlString(xml, sitOff, stOff, nameSi); //tr.addSelect(name, null); startTagLineNo = lineNo; // Look for the Attributes StringBuffer sb = new StringBuffer(); for (int ii=0; ii<numbAttrs; ii++) { int attrNameNsSi = LEW(xml, off); // AttrName Namespace Str Ind, or FFFFFFFF int attrNameSi = LEW(xml, off+1*4); // AttrName String Index int attrValueSi = LEW(xml, off+2*4); // AttrValue Str Ind, or FFFFFFFF int attrFlags = LEW(xml, off+3*4); int attrResId = LEW(xml, off+4*4); // AttrValue ResourceId or dup AttrValue StrInd off += 5*4; // Skip over the 5 words of an attribute String attrName = compXmlString(xml, sitOff, stOff, attrNameSi); String attrValue = attrValueSi!=-1 ? compXmlString(xml, sitOff, stOff, attrValueSi) : "resourceID 0x"+Integer.toHexString(attrResId); sb.append(" "+attrName+"=\""+attrValue+"\""); //tr.add(attrName, attrValue); } resultXml.append(prtIndent(indent, "<"+name+sb+">")); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6*4; // Skip over 6 words of endTag data String name = compXmlString(xml, sitOff, stOff, nameSi); resultXml.append(prtIndent(indent, "</"+name+"> (line "+startTagLineNo+"-"+lineNo+")")); //tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { Log.e(TAG, " Unrecognized tag code '"+Integer.toHexString(tag0) +"' at offset "+off); break; } } // end of while loop scanning tags and attributes of XML tree Log.i(TAG, " end at offset "+off); return resultXml.toString(); } // end of decompressXML /** * Tool Method for decompressXML(); * Compute binary XML to its string format * Source: Source: http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package/4761689#4761689 * * @param xml Binary-formatted XML * @param sitOff * @param stOff * @param strInd * @return String-formatted XML */ public static String compXmlString(byte[] xml, int sitOff, int stOff, int strInd) { if (strInd < 0) return null; int strOff = stOff + LEW(xml, sitOff+strInd*4); return compXmlStringAt(xml, strOff); } /** * Tool Method for decompressXML(); * Apply indentation * * @param indent Indentation level * @param str String to indent * @return Indented string */ public static String prtIndent(int indent, String str) { return (spaces.substring(0, Math.min(indent*2, spaces.length()))+str); } /** * Tool method for decompressXML() * Return the string stored in StringTable format at * offset strOff. This offset points to the 16 bit string length, which * is followed by that number of 16 bit (Unicode) chars. * * @param arr StringTable array * @param strOff Offset to get string from * @return String from StringTable at offset strOff * */ public static String compXmlStringAt(byte[] arr, int strOff) { int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff; byte[] chars = new byte[strLen]; for (int ii=0; ii<strLen; ii++) { chars[ii] = arr[strOff+2+ii*2]; } return new String(chars); // Hack, just use 8 byte chars } // end of compXmlStringAt /** * Return value of a Little Endian 32 bit word from the byte array * at offset off. * * @param arr Byte array with 32 bit word * @param off Offset to get word from * @return Value of Little Endian 32 bit word specified */ public static int LEW(byte[] arr, int off) { return arr[off+3]<<24&0xff000000 | arr[off+2]<<16&0xff0000 | arr[off+1]<<8&0xff00 | arr[off]&0xFF; } // end of LEW 

Hope it can help other people too.

In Android studio 2.2 you can directly analyze the apk. Goto build- analyze apk. Select the apk, navigate to androidmanifest.xml. You can see the details of androidmanifest.

I found the AXMLPrinter2, a Java app over at the Android4Me project to work fine on the AndroidManifest.xml that I had (and prints the XML out in a nicely formatted way). http://code.google.com/p/android4me/downloads/detail?name=AXMLPrinter2.jar

One note.. it (and the code on this answer from Ribo) doesn't appear to handle every compiled XML file that I've come across. I found one where the strings were stored with one byte per character, rather than the double byte format that it assumes.

it can be helpful

 public static int vCodeApk(String path) { PackageManager pm = G.context.getPackageManager(); PackageInfo info = pm.getPackageArchiveInfo(path, 0); return info.versionCode; // Toast.makeText(this, "VersionCode : " + info.versionCode + ", VersionName : " + info.versionName, Toast.LENGTH_LONG).show(); } 

G is my Application class :

 public class G extends Application {