diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index 5c5523f47c3..1d181779891 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -875,13 +875,24 @@ - - + + + + + + + - - + + + + + + + + @@ -911,6 +922,15 @@ + + + + + + @@ -4847,6 +4867,7 @@ + diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java index f52482c8d78..0752731686e 100644 --- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java +++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java @@ -105,7 +105,7 @@ public void testHttpServerFileExtensions() { assertEquals("application/pics-rules", tika.detect("x.prf")); assertEquals("application/pkcs10", tika.detect("x.p10")); assertEquals("application/pkcs7-mime", tika.detect("x.p7m")); - assertEquals("application/pkcs7-mime", tika.detect("x.p7c")); + assertEquals("application/pkcs7-mime; smime-type=certs-only", tika.detect("x.p7c")); assertEquals("application/pkcs7-signature", tika.detect("x.p7s")); assertEquals("application/pkix-cert", tika.detect("x.cer")); assertEquals("application/pkix-crl", tika.detect("x.crl")); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java new file mode 100644 index 00000000000..6c26022af43 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect.crypto; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1InputStream; +import org.bouncycastle.asn1.ASN1Integer; +import org.bouncycastle.asn1.ASN1ObjectIdentifier; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.ASN1Sequence; +import org.bouncycastle.asn1.ASN1Set; +import org.bouncycastle.asn1.ASN1TaggedObject; +import org.bouncycastle.asn1.DLTaggedObject; + +import org.apache.tika.config.Field; +import org.apache.tika.detect.Detector; +import org.apache.tika.io.BoundedInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; + +/** + * This is a very limited asn1 detector that focuses on pkcs and timestamped-data (so far) + */ +public class ASN1Detector implements Detector { + + private static final String DATA_OID = "1.2.840.113549.1.7.1"; + + private static final Map ENVELOPED = Map.of("smime-type", "enveloped-data"); + private static final Map SIGNED = Map.of("smime-type", "signed-data"); + private static final Map CERTS_ONLY = Map.of("smime-type", "certs-only"); + private static final Map COMPRESSED = Map.of("smime-type", "compressed-data"); + + + private static final long serialVersionUID = -8414458255467101503L; + private static final MediaType PKCS12_MEDIA_TYPE = MediaType.application("x-pkcs12"); + private static final MediaType PKCS7_ENVELOPED = new MediaType("application", "pkcs7-mime", ENVELOPED); + private static final MediaType PKCS7_SIGNED = new MediaType("application", "pkcs7-mime", SIGNED); + private static final MediaType PKCS7_CERTS_ONLY = new MediaType("application", "pkcs7-mime", CERTS_ONLY); + private static final MediaType PKCS7_COMPRESSED = new MediaType("application", "pkcs7-mime", COMPRESSED); + private static final MediaType PKCS7_SIGNATURE_ONLY = MediaType.application("pkcs7-signature"); + + //not pkcs7 at all, but shares magic with compressed pkcs7 + private static final MediaType TIME_STAMPED_DATA = MediaType.application("timestamped-data"); + + private int markLimit = 1000000; + + @Override + public MediaType detect(InputStream input, Metadata metadata) throws IOException { + if (input == null) { + return null; + } + try { + input.mark(2); + int b = input.read(); + if (b != 0x30) { + return null; + } + b = input.read(); + if (b < 0x7A || b > 0x84) { + return null; + } + } finally { + input.reset(); + } + PKCSFeatures pkcsFeatures = new PKCSFeatures(); + BoundedInputStream bis = new BoundedInputStream(markLimit, input); + bis.mark(markLimit); + try { + ASN1InputStream asn1InputStream = new ASN1InputStream(bis); + ASN1Primitive root = null; + if ((root = asn1InputStream.readObject()) != null) { + handleRootNode(root, pkcsFeatures); + if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.TIME_STAMPED_DATA) { + return TIME_STAMPED_DATA; + } else if (pkcsFeatures.looksLikePKCS12) { + return PKCS12_MEDIA_TYPE; + } else if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.ENVELOPED_DATA) { + return PKCS7_ENVELOPED; + } else if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.COMPRESSED) { + return PKCS7_COMPRESSED; + } else if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.SIGNED_DATA) { + if (pkcsFeatures.hasData) { + return PKCS7_SIGNED; + } else if (pkcsFeatures.hasCerts) { + return PKCS7_CERTS_ONLY; + } else { + return PKCS7_SIGNATURE_ONLY; + } + } + } + } catch (IOException e) { + e.printStackTrace(); + //swallow + } finally { + bis.reset(); + } + return null; + } + + private void handleRootNode(ASN1Primitive root, PKCSFeatures pkcsFeatures) throws IOException { + String oid = null; + ASN1TaggedObject taggedObject = null; + if (!(root instanceof ASN1Sequence)) { + return; + } + ASN1Sequence seq = (ASN1Sequence) root; + //try for pkcs12 + if (seq.size() == 3) { + tryPKCS12(seq, pkcsFeatures); + if (pkcsFeatures.looksLikePKCS12) { + return; + } + } + for (ASN1Encodable c : ((ASN1Sequence) root)) { + if (c instanceof ASN1ObjectIdentifier) { + oid = ((ASN1ObjectIdentifier) c).toString(); + } else if (c instanceof ASN1TaggedObject) { + taggedObject = (ASN1TaggedObject) c; + } + } + PKCSFeatures.PRIMARY_TYPE type = PKCSFeatures.lookup(oid); + pkcsFeatures.primaryType = type; + if (type == PKCSFeatures.PRIMARY_TYPE.UNKNOWN) { + return; + } else if (type == PKCSFeatures.PRIMARY_TYPE.TIME_STAMPED_DATA) { + return; + } + if (taggedObject != null) { + handleNode(taggedObject, pkcsFeatures); + } + } + + private void tryPKCS12(ASN1Sequence seq, ASN1Detector.PKCSFeatures pkcsFeatures) { + //This could much more rigorous -- see TIKA-3784 + + //require version 3 as the first value + ASN1Encodable obj0 = seq.getObjectAt(0); + if (! (obj0 instanceof ASN1Integer)) { + return; + } + if (((ASN1Integer)obj0).getValue().intValue() != 3) { + return; + } + //require two sequences + if (! (seq.getObjectAt(1) instanceof ASN1Sequence) || + ! (seq.getObjectAt(2) instanceof ASN1Sequence)) { + return; + } + //first sequence must have a data type oid as its first element + ASN1Sequence seq1 = (ASN1Sequence) seq.getObjectAt(1); + if (seq1.size() < 2) { + return; + } + if (! (seq1.getObjectAt(0) instanceof ASN1ObjectIdentifier)) { + return; + } + if (! DATA_OID.equals(((ASN1ObjectIdentifier)seq1.getObjectAt(0)).getId())) { + return; + } + //and a tagged object as its second + //if you parse the tagged object and iterate through its children + //you should eventually find oids starting with "1.2.840.113549.1.12.*" + if (! (seq1.getObjectAt(1) instanceof DLTaggedObject)) { + return; + } + pkcsFeatures.looksLikePKCS12 = true; + } + + private void handleSequence(ASN1Sequence seq, PKCSFeatures pkcsFeatures) throws IOException { + if (seq.size() == 0) { + return; + } + if (isCert(seq)) { + pkcsFeatures.hasCerts = true; + return; + } + if (hasSignedData(seq)) { + pkcsFeatures.hasData = true; + return; + } + + + } + + private boolean isCert(ASN1Sequence seq) { + if (seq.size() != 6) { + return false; + } + //do more + //e.g. check for sequence in seq.get(2) and make sure there's a data oid there + return true; + } + + private boolean hasSignedData(ASN1Sequence seq) { + if (seq.size() != 5) { + return false; + } + //data should be a sequence in position 2 + ASN1Encodable dataSequence = seq.getObjectAt(2); + if (! (dataSequence instanceof ASN1Sequence)) { + return false; + } + if (((ASN1Sequence) dataSequence).size() < 1) { + return false; + } + ASN1Encodable obj0 = ((ASN1Sequence) dataSequence).getObjectAt(0); + if (obj0 instanceof ASN1ObjectIdentifier) { + if (DATA_OID.equals(((ASN1ObjectIdentifier) obj0).getId())) { + //TODO -- check for null or actual data? + if (((ASN1Sequence) dataSequence).size() > 1) { + return true; + } + } + } + return false; + } + + private void handleNode(ASN1Primitive primitive, PKCSFeatures pkcsFeatures) throws IOException { + if (primitive instanceof ASN1Sequence) { + handleSequence((ASN1Sequence) primitive, pkcsFeatures); + } else if (primitive instanceof ASN1TaggedObject) { + handleTagged((ASN1TaggedObject) primitive, pkcsFeatures); + } else if (primitive instanceof ASN1OctetString) { + ASN1OctetString octetString = (ASN1OctetString) primitive; + try { + ASN1Primitive newP = ASN1Primitive.fromByteArray(octetString.getOctets()); + handleNode(newP, pkcsFeatures); + } catch (IOException e) { + //swallow + + } + } else if (primitive instanceof ASN1ObjectIdentifier) { + ASN1ObjectIdentifier oid = (ASN1ObjectIdentifier) primitive; + + } else if (primitive instanceof ASN1Set) { + for (ASN1Encodable obj : ((ASN1Set)primitive)) { + handleNode(obj.toASN1Primitive(), pkcsFeatures); + } + } + } + + private void handleTagged(ASN1TaggedObject tagged, PKCSFeatures pkcsFeatures) throws IOException { + handleNode(tagged.getBaseObject().toASN1Primitive(), pkcsFeatures); + } + + @Field + public void setMarkLimit(int markLimit) { + this.markLimit = markLimit; + } + + private static class PKCSFeatures { + enum PRIMARY_TYPE { + SIGNED_DATA("1.2.840.113549.1.7.2"), ENVELOPED_DATA("1.2.840.113549.1.7.3"), + SIGNED_AND_ENVELOPED_DATA("1.2.840.113549.1.7.4"), + DIGESTED_DATA("1.2.840.113549.1.7.5"), + ENCRYPTED_DATA("1.2.840.113549.1.7.6"), COMPRESSED("1.2.840.113549.1.9.16.1.9"), + TIME_STAMPED_DATA("1.2.840.113549.1.9.16.1.31"), UNKNOWN("UNKNOWN"); + private final String oid; + + PRIMARY_TYPE(String oid) { + this.oid = oid; + } + } + + private static Map TYPE_LOOKUP = new HashMap<>(); + static { + for (PRIMARY_TYPE t : PRIMARY_TYPE.values()) { + if (t == PRIMARY_TYPE.UNKNOWN) { + continue; + } + TYPE_LOOKUP.put(t.oid, t); + } + } + private PRIMARY_TYPE primaryType = PRIMARY_TYPE.UNKNOWN; + private boolean hasData; + private boolean hasCerts; + private boolean hasSignature; + private boolean looksLikePKCS12; + + static PRIMARY_TYPE lookup(String oid) { + if (TYPE_LOOKUP.containsKey(oid)) { + return TYPE_LOOKUP.get(oid); + } + return PRIMARY_TYPE.UNKNOWN; + } + + @Override + public String toString() { + return "PKCSFeatures{" + "primaryType=" + primaryType + ", hasData=" + hasData + ", hasCerts=" + hasCerts + ", hasSignature=" + hasSignature + ", hasPKCS12Oid=" + + looksLikePKCS12 + '}'; + } + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java new file mode 100644 index 00000000000..08b73d44dd5 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect.crypto.dev; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.bouncycastle.asn1.ASN1Boolean; +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1InputStream; +import org.bouncycastle.asn1.ASN1Integer; +import org.bouncycastle.asn1.ASN1Null; +import org.bouncycastle.asn1.ASN1ObjectIdentifier; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.ASN1Sequence; +import org.bouncycastle.asn1.ASN1Set; +import org.bouncycastle.asn1.ASN1TaggedObject; +import org.bouncycastle.asn1.DERIA5String; +import org.bouncycastle.asn1.DERPrintableString; + +public class ASN1Dumper { + + public static void main(String[] args) throws Exception { + Path p = Paths.get(args[0]); + try (InputStream is = Files.newInputStream(p)) { + ASN1InputStream asn1InputStream = new ASN1InputStream(is); + ASN1Primitive root = asn1InputStream.readObject(); + handleNode(root, 0); + } + } + + private static void handleNode(ASN1Primitive primitive, int depth) throws IOException { + if (primitive instanceof ASN1Sequence) { + handleSequence((ASN1Sequence) primitive, depth); + } else if (primitive instanceof ASN1TaggedObject) { + handleTagged((ASN1TaggedObject) primitive, depth); + } else if (primitive instanceof ASN1Integer) { + System.out.println(d(depth) + "Integer: " + ((ASN1Integer)primitive).getValue().intValue()); + } else if (primitive instanceof ASN1OctetString) { + ASN1OctetString octetString = (ASN1OctetString) primitive; + try { + ASN1Primitive newP = ASN1Primitive.fromByteArray(octetString.getOctets()); + handleNode(newP, depth); + } catch (IOException e) { + System.out.println(d(depth) + "FAILED: " + octetString.toString().substring(0, 10)); + + } + } else if (primitive instanceof ASN1ObjectIdentifier) { + ASN1ObjectIdentifier oid = (ASN1ObjectIdentifier) primitive; + System.out.println(d(depth) + "OID: " + oid.toString()); + + } else if (primitive instanceof ASN1Set) { + for (ASN1Encodable obj : ((ASN1Set)primitive)) { + handleNode(obj.toASN1Primitive(), depth + 1); + } + } else if (primitive instanceof ASN1Null) { + System.out.println(d(depth) + "NULL"); + } else if (primitive instanceof DERIA5String) { + System.out.println(d(depth) + ((DERIA5String)primitive).getString()); + } else if (primitive instanceof DERPrintableString) { + System.out.println(d(depth) + ((DERPrintableString)primitive).getString()); + } else if (primitive instanceof ASN1Boolean) { + System.out.println(d(depth) + ((ASN1Boolean)primitive).toString()); + } else { + System.out.println(d(depth) + "Not handling " + primitive.getClass()); + } + } + + private static void handleSequence(ASN1Sequence seq, int depth) throws IOException { + System.out.println(d(depth) + "seq size: " + seq.size()); + int i = 0; + for (ASN1Encodable p : seq) { + String s = p.toString(); + if (s.length() > 20) { + s = s.substring(0, 20) + "..."; + } +// System.out.println(d(depth) + "SEQUENCE " + i++ + " : " + s + " : " + p.getClass()); + } + // System.out.println(d(depth) + "handling children"); + for (ASN1Encodable p : seq) { + handleNode(p.toASN1Primitive(), depth + 1); + } + + } + + private static void handleTagged(ASN1TaggedObject tagged, int depth) throws IOException { + System.out.println(d(depth) + "handling tagged " + tagged.getBaseObject().getClass()); + handleNode(tagged.getBaseObject().toASN1Primitive(), depth + 1); + } + + + private static String d(int depth) { + StringBuilder sb = new StringBuilder(); + for (int i = 0 ; i < depth; i++) { + sb.append(" "); + } + return sb.toString(); + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java index 0c5ade3681f..973104444d2 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java @@ -63,6 +63,7 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, try { DigestCalculatorProvider digestCalculatorProvider = new JcaDigestCalculatorProviderBuilder().setProvider("BC").build(); + CMSSignedDataParser parser = new CMSSignedDataParser(digestCalculatorProvider, CloseShieldInputStream.wrap(stream)); try { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java index 2a0e4a0f957..357d37947e0 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java @@ -24,7 +24,6 @@ import java.security.NoSuchProviderException; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -81,7 +80,7 @@ public class TSDParser implements Parser { private static final String TSD_TSA = "TSA"; private static final String TSD_ALGORITHM = "Algorithm"; private static final Set SUPPORTED_TYPES = - Collections.singleton(MediaType.application("timestamped-data")); + Set.of(MediaType.application("timestamped-data"), MediaType.application("x-tika-compressed-pkc7-base")); @Override public Set getSupportedTypes(ParseContext context) { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector new file mode 100644 index 00000000000..5cb880c5777 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.tika.detect.crypto.ASN1Detector \ No newline at end of file diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java index 42761d3de2c..ecdaef4f92f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java @@ -22,6 +22,7 @@ import java.io.InputStream; +import org.junit.jupiter.api.Test; import org.xml.sax.ContentHandler; import org.apache.tika.TikaTest; @@ -31,6 +32,8 @@ import org.apache.tika.sax.BodyContentHandler; public class Pkcs7ParserTest extends TikaTest { + + @Test public void testDetachedSignature() throws Exception { try (InputStream input = getResourceAsStream("/test-documents/testDetached.p7s")) { ContentHandler handler = new BodyContentHandler(); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/test.xml.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/test.xml.p7m new file mode 100644 index 00000000000..ce21cc961f1 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/test.xml.p7m differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_def.p7c b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_def.p7c new file mode 100644 index 00000000000..5f25350c486 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_def.p7c differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_ind.p7c b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_ind.p7c new file mode 100644 index 00000000000..c0bc161e84c Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_ind.p7c differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_long.p7z b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_long.p7z new file mode 100644 index 00000000000..74474876ed6 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_long.p7z differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_short.p7z b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_short.p7z new file mode 100644 index 00000000000..da9fbfaf1be Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_short.p7z differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_ind.p7z b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_ind.p7z new file mode 100644 index 00000000000..6ec9716b65f Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_ind.p7z differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_def.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_def.p7m new file mode 100644 index 00000000000..dad330064d7 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_def.p7m differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_ind.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_ind.p7m new file mode 100644 index 00000000000..eb182562c87 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_ind.p7m differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_def.p7s b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_def.p7s new file mode 100644 index 00000000000..b90e41ea171 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_def.p7s differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_ind.p7s b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_ind.p7s new file mode 100644 index 00000000000..dd9400a9b40 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_ind.p7s differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.p7m new file mode 100644 index 00000000000..1f0adb74993 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.p7m differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem new file mode 100644 index 00000000000..34ef17fd53d --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem @@ -0,0 +1,35 @@ +-----BEGIN PKCS7----- +MIIGIgYJKoZIhvcNAQcCoIIGEzCCBg8CAQExDTALBglghkgBZQMEAgEwEwYJKoZI +hvcNAQcBoAYEBHRleHSgggPDMIIDvzCCAyigAwIBAgICAKEwDQYJKoZIhvcNAQEF +BQAwcjELMAkGA1UEBhMCRVMxDzANBgNVBAgTBk1hZHJpZDEPMA0GA1UEBxMGTWFk +cmlkMQ4wDAYDVQQKEwVNSVR5QzEbMBkGA1UECxMSTUlUeUMgRE5JZSBQcnVlYmFz +MRQwEgYDVQQDEwtDQSB1c3VhcmlvczAeFw0xMDA0MDgxMjM4MjJaFw0yMDA0MDUx +MjM4MjJaMHwxCzAJBgNVBAYTAkVTMQ8wDQYDVQQIEwZNYWRyaWQxDzANBgNVBAcT +Bk1hZHJpZDEOMAwGA1UEChMFTUlUeUMxGzAZBgNVBAsTEk1JVHlDIEROSWUgUHJ1 +ZWJhczEeMBwGA1UEAxMVVXN1YXJpbyBkZSBwcnVlYmFzIDYxMIGfMA0GCSqGSIb3 +DQEBAQUAA4GNADCBiQKBgQC2ehoLcO6lXWmKzJfdz2m+vRZmGeDo5OF+Q8MNdVtL +8AKbMykP6G9JOzBT3WLhzQKszMg43DQjViN6mTQsPLYCfe/n6LmTWZkRvsIzrffM +aL7goy47VCX1CeUQ80cuUAHJpRq7UMObNvgV/8rn+zPfYmErqZVhAckleP4/RgeC +9QIDAQABo4IBWDCCAVQwCQYDVR0TBAIwADALBgNVHQ8EBAMCBeAwHQYDVR0OBBYE +FNhww+tqmd7gvdedMv0Gk1mEolYdMIGYBgNVHSMEgZAwgY2AFPWhaqh3T1uxBIyn +fkjxDp/Fdo8boXKkcDBuMQ8wDQYDVQQIEwZNYWRyaWQxDzANBgNVBAcTBk1hZHJp +ZDEOMAwGA1UEChMFTUlUeUMxGzAZBgNVBAsTEk1JVHlDIEROSWUgUHJ1ZWJhczEQ +MA4GA1UEAxMHUm9vdCBDQTELMAkGA1UEBhMCRVOCAQMwCQYDVR0RBAIwADA2BgNV +HRIELzAthitodHRwOi8vbWluaXN0ZXItOGpneHk5Lm1pdHljLmFnZS9QS0kvQ0Eu +Y3J0MD0GA1UdHwQ2MDQwMqAwoC6GLGh0dHA6Ly9taW5pc3Rlci04amd4eTkubWl0 +eWMuYWdlL1BLSS9jcmwuY3JsMA0GCSqGSIb3DQEBBQUAA4GBADn3vfsgfhyreHhn +4VCG4WuG+g5qTRKpu72ZScMbyY+e3d2m6fOSMkoEC+NwaXgl4Y/vKlXGKgdhYjLD +WjobRDVQOQRQ4Q/Wv/aPPno1CjBkYdY5rnCM/oiy8QXNjdboXRiE40kCyqj7jiop +B20uO2a0yacC5ooWQz1pqZEhSWRDMYICHTCCAhkCAQEweDByMQswCQYDVQQGEwJF +UzEPMA0GA1UECBMGTWFkcmlkMQ8wDQYDVQQHEwZNYWRyaWQxDjAMBgNVBAoTBU1J +VHlDMRswGQYDVQQLExJNSVR5QyBETkllIFBydWViYXMxFDASBgNVBAMTC0NBIHVz +dWFyaW9zAgIAoTALBglghkgBZQMEAgGggfowGAYJKoZIhvcNAQkDMQsGCSqGSIb3 +DQEHATAcBgkqhkiG9w0BCQUxDxcNMTkwMTI0MTM0MjI1WjAvBgkqhkiG9w0BCQQx +IgQgmC2ePrmW9VnmM/TRlN7zdh2Qn1o7ZH0ahR/q1nwyydEwgY4GCSqGSIb3DQEJ +DzGBgDB+MAsGCWCGSAFlAwQBKjAIBgYqhQMCAgkwCAYGKoUDAgIVMAsGCWCGSAFl +AwQBFjALBglghkgBZQMEAQIwCgYIKoZIhvcNAwcwDgYIKoZIhvcNAwICAgCAMA0G +CCqGSIb3DQMCAgFAMAcGBSsOAwIHMA0GCCqGSIb3DQMCAgEoMA0GCSqGSIb3DQEB +AQUABIGAFR6+Q41Ak8HXUNnbkEQDWN3JBacSYi5mAPtHyJNHGLewKTwxOrf/nGhk +Qq5zqvMLhci0NBU3wvW+lAKX3ytavfkRkRFlTruuH151Stkr2TRW6h132ggA2AAJ +3gGxGqN2bYAYB6O8QY4NhmJdSi+j4bK0ayo15HhpagRfp2181lU= +-----END PKCS7----- diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_ind.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_ind.p7m new file mode 100644 index 00000000000..cf540839511 Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_ind.p7m differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testRSAKEYandCERT.p12 b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testRSAKEYandCERT.p12 new file mode 100644 index 00000000000..1c536e8fbea Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testRSAKEYandCERT.p12 differ diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java index 04e431361ba..5bf9824140a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java @@ -672,6 +672,31 @@ public void testPOIFSContainerDetector() throws Exception { } } + @Test + public void testPKCSAndFriends() throws Exception { + assertTypeByNameAndData("Test4.pdf.tsd", "application/timestamped-data"); + + + assertTypeByNameAndData("testPKCS7_certs_only_def.p7c", "application/pkcs7-mime; smime-type=certs-only"); + assertTypeByNameAndData("testPKCS7_certs_only_ind.p7c", "application/pkcs7-mime; smime-type=certs-only"); + assertTypeByNameAndData("testPKCS7_compressed_def_long.p7z", "application/pkcs7-mime; smime-type=compressed-data"); + assertTypeByNameAndData("testPKCS7_compressed_def_short.p7z", "application/pkcs7-mime; smime-type=compressed-data"); + assertTypeByNameAndData("testPKCS7_compressed_ind.p7z", "application/pkcs7-mime; smime-type=compressed-data"); + assertTypeByNameAndData("testPKCS7_signature_def.p7s", "application/pkcs7-signature"); + assertTypeByNameAndData("testPKCS7_signature_ind.p7s", "application/pkcs7-signature"); + + assertTypeByNameAndData("testPKCS7_signed_data_def.p7m", "application/pkcs7-mime; smime-type=signed-data"); + assertTypeByNameAndData("testPKCS7_signed_data_ind.p7m", "application/pkcs7-mime; smime-type=signed-data"); + assertTypeByNameAndData("testPKCS7_enveloped_def.p7m", "application/pkcs7-mime; smime-type=enveloped-data"); + assertTypeByNameAndData("testPKCS7_enveloped_ind.p7m", "application/pkcs7-mime; smime-type=enveloped-data"); + + assertTypeByNameAndData("test.xml.p7m", "application/pkcs7-mime; smime-type=signed-data"); + assertTypeByNameAndData("Test4.pdf.tsd", "application/timestamped-data"); + assertTypeByNameAndData("testDetached.p7s", "application/pkcs7-signature"); + + assertTypeByNameAndData("testRSAKEYandCERT.p12", "application/x-pkcs12"); + } + private long countBytes(InputStream is) throws IOException { int b = is.read(); long len = 0; diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java index 28b9b0dd462..8b8875a7b63 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java @@ -32,13 +32,20 @@ public void testBasic() throws Exception { //integration test Detector detector = TikaConfig.getDefaultConfig().getDetector(); List detectors = ((CompositeDetector) detector).getDetectors(); - assertEquals(8, detectors.size()); - assertEquals("org.gagravarr.tika.OggDetector", detectors.get(0).getClass().getName()); - assertEquals("org.apache.tika.detect.gzip.GZipSpecializationDetector", - detectors.get(3).getClass().getName()); - - assertEquals("org.apache.tika.detect.microsoft.POIFSContainerDetector", - detectors.get(4).getClass().getName()); - assertEquals("org.apache.tika.mime.MimeTypes", detectors.get(7).getClass().getName()); + String[] expected = new String[]{ + "org.gagravarr.tika.OggDetector", + "org.apache.tika.detect.MatroskaDetector", + "org.apache.tika.detect.apple.BPListDetector", + "org.apache.tika.detect.crypto.ASN1Detector", + "org.apache.tika.detect.gzip.GZipSpecializationDetector", + "org.apache.tika.detect.microsoft.POIFSContainerDetector", + "org.apache.tika.detect.ole.MiscOLEDetector", + "org.apache.tika.detect.zip.DefaultZipContainerDetector", + "org.apache.tika.mime.MimeTypes" + }; + assertEquals(9, detectors.size()); + for (int i = 0; i < detectors.size(); i++) { + assertEquals(expected[i], detectors.get(i).getClass().getName()); + } } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java index f72f7abb7fa..ca807d10aac 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java @@ -1214,14 +1214,14 @@ public void testMIF() throws Exception { @Test public void testPKCSSignatures() throws Exception { // PKCS7 Signed XML files - assertType("application/pkcs7-signature", "testPKCS17Sig.xml.p7m"); - assertType("application/pkcs7-signature", "testPKCS17Sig-v2.xml.p7m"); - assertType("application/pkcs7-signature", "testPKCS17Sig-v3.xml.p7m"); - assertType("application/pkcs7-signature", "testPKCS17Sig-v4.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig-v2.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig-v3.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig-v4.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig-v2.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig-v3.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig-v4.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig-v2.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig-v3.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig-v4.xml.p7m"); } @Test @@ -1386,6 +1386,37 @@ public void testAACDetection() throws Exception { assertTypeByName("audio/x-aac", "x.aac"); } + @Test + public void testPKCS7() throws Exception { + //from rob975 on https://github.com/apache/tika/pull/267 + // application/pkcs7-* media types + // this tests only the magic detector, which is not sufficient. See the ASN1Detector + assertType("application/pkcs7-mime; smime-type=certs-only", "testPKCS7_certs_only_def.p7c"); + assertType("application/pkcs7-mime; smime-type=certs-only", "testPKCS7_certs_only_ind.p7c"); + assertType("application/pkcs7-mime; smime-type=compressed-data", "testPKCS7_compressed_def_long.p7z"); + assertType("application/pkcs7-mime; smime-type=compressed-data", "testPKCS7_compressed_def_short.p7z"); + assertType("application/pkcs7-mime; smime-type=compressed-data", "testPKCS7_compressed_ind.p7z"); + assertType("application/pkcs7-signature", "testPKCS7_signature_def.p7s"); + assertType("application/pkcs7-signature", "testPKCS7_signature_ind.p7s"); + + /* can't distinguish these with file extension and magic + assertType("application/pkcs7-mime; smime-type=signed-data", "testPKCS7_signed_data_def.p7m"); + assertType("application/pkcs7-mime; smime-type=signed-data", "testPKCS7_signed_data_def.pem"); + assertType("application/pkcs7-mime; smime-type=signed-data", "testPKCS7_signed_data_ind.p7m"); + assertType("application/pkcs7-mime; smime-type=enveloped-data", "testPKCS7_enveloped_def.p7m"); + assertType("application/pkcs7-mime; smime-type=enveloped-data", "testPKCS7_enveloped_ind.p7m"); + */ + assertTypeByData("application/pkcs7-mime", "testPKCS7_certs_only_def.p7c"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_certs_only_ind.p7c"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_enveloped_def.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_enveloped_ind.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signature_def.p7s"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signature_ind.p7s"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signed_data_def.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signed_data_def.pem"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signed_data_ind.p7m"); + } + private void assertText(byte[] prefix) throws IOException { assertMagic("text/plain", prefix); } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java index 18c13145991..d46105ec4f4 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java @@ -40,6 +40,7 @@ public void testBrokenPdf() throws Exception { //make sure that embedded file appears in list //and make sure embedded exception is recorded List list = getRecursiveMetadata("testTSD_broken_pdf.tsd", parseContext); + debug(list); assertEquals(2, list.size()); assertEquals("application/pdf", list.get(1).get(Metadata.CONTENT_TYPE)); assertNotNull(list.get(1).get(TikaCoreProperties.EMBEDDED_EXCEPTION));