From e98079ef9fbe298b59f38bb641ec70662bda9b0f Mon Sep 17 00:00:00 2001 From: tallison Date: Mon, 14 Jul 2025 15:07:47 -0400 Subject: [PATCH 1/3] TIKA-1997 -- initial poc, derived from rob975's work on: https://github.com/apache/tika/pull/267 --- .../org/apache/tika/mime/tika-mimetypes.xml | 29 +- .../org/apache/tika/TikaDetectionTest.java | 2 +- .../tika/detect/crypto/ASN1Detector.java | 313 ++++++++++++++++++ .../tika/detect/crypto/dev/ASN1Dumper.java | 117 +++++++ .../tika/parser/crypto/Pkcs7Parser.java | 1 + .../apache/tika/parser/crypto/TSDParser.java | 3 +- .../services/org.apache.tika.detect.Detector | 16 + .../tika/parser/crypto/Pkcs7ParserTest.java | 3 + .../resources/test-documents/test.xml.p7m | Bin 0 -> 2504 bytes .../testPKCS7_certs_only_def.p7c | Bin 0 -> 2972 bytes .../testPKCS7_certs_only_ind.p7c | Bin 0 -> 2976 bytes .../testPKCS7_compressed_def_long.p7z | Bin 0 -> 331 bytes .../testPKCS7_compressed_def_short.p7z | Bin 0 -> 125 bytes .../testPKCS7_compressed_ind.p7z | Bin 0 -> 141 bytes .../testPKCS7_enveloped_def.p7m | Bin 0 -> 350 bytes .../testPKCS7_enveloped_ind.p7m | Bin 0 -> 356 bytes .../testPKCS7_signature_def.p7s | Bin 0 -> 1566 bytes .../testPKCS7_signature_ind.p7s | Bin 0 -> 1568 bytes .../testPKCS7_signed_data_def.p7m | Bin 0 -> 1574 bytes .../testPKCS7_signed_data_def.pem | 35 ++ .../testPKCS7_signed_data_ind.p7m | Bin 0 -> 1582 bytes .../test-documents/testRSAKEYandCERT.p12 | Bin 0 -> 1717 bytes .../detect/TestContainerAwareDetector.java | 25 ++ .../tika/detect/TestDetectorLoading.java | 10 +- .../org/apache/tika/mime/TestMimeTypes.java | 47 ++- .../tika/parser/crypto/TSDParserTest.java | 1 + 26 files changed, 583 insertions(+), 19 deletions(-) create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/test.xml.p7m create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_def.p7c create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_ind.p7c create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_long.p7z create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_short.p7z create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_ind.p7z create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_def.p7m create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_ind.p7m create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_def.p7s create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signature_ind.p7s create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.p7m create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_ind.p7m create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testRSAKEYandCERT.p12 diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index 0978ef5755b..870855c4bfc 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -875,13 +875,24 @@ - - + + + + + + + - - + + + + + + + + @@ -911,6 +922,15 @@ + + + + + + @@ -4847,6 +4867,7 @@ + diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java index f52482c8d78..0752731686e 100644 --- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java +++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java @@ -105,7 +105,7 @@ public void testHttpServerFileExtensions() { assertEquals("application/pics-rules", tika.detect("x.prf")); assertEquals("application/pkcs10", tika.detect("x.p10")); assertEquals("application/pkcs7-mime", tika.detect("x.p7m")); - assertEquals("application/pkcs7-mime", tika.detect("x.p7c")); + assertEquals("application/pkcs7-mime; smime-type=certs-only", tika.detect("x.p7c")); assertEquals("application/pkcs7-signature", tika.detect("x.p7s")); assertEquals("application/pkix-cert", tika.detect("x.cer")); assertEquals("application/pkix-crl", tika.detect("x.crl")); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java new file mode 100644 index 00000000000..6c26022af43 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/ASN1Detector.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect.crypto; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1InputStream; +import org.bouncycastle.asn1.ASN1Integer; +import org.bouncycastle.asn1.ASN1ObjectIdentifier; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.ASN1Sequence; +import org.bouncycastle.asn1.ASN1Set; +import org.bouncycastle.asn1.ASN1TaggedObject; +import org.bouncycastle.asn1.DLTaggedObject; + +import org.apache.tika.config.Field; +import org.apache.tika.detect.Detector; +import org.apache.tika.io.BoundedInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; + +/** + * This is a very limited asn1 detector that focuses on pkcs and timestamped-data (so far) + */ +public class ASN1Detector implements Detector { + + private static final String DATA_OID = "1.2.840.113549.1.7.1"; + + private static final Map ENVELOPED = Map.of("smime-type", "enveloped-data"); + private static final Map SIGNED = Map.of("smime-type", "signed-data"); + private static final Map CERTS_ONLY = Map.of("smime-type", "certs-only"); + private static final Map COMPRESSED = Map.of("smime-type", "compressed-data"); + + + private static final long serialVersionUID = -8414458255467101503L; + private static final MediaType PKCS12_MEDIA_TYPE = MediaType.application("x-pkcs12"); + private static final MediaType PKCS7_ENVELOPED = new MediaType("application", "pkcs7-mime", ENVELOPED); + private static final MediaType PKCS7_SIGNED = new MediaType("application", "pkcs7-mime", SIGNED); + private static final MediaType PKCS7_CERTS_ONLY = new MediaType("application", "pkcs7-mime", CERTS_ONLY); + private static final MediaType PKCS7_COMPRESSED = new MediaType("application", "pkcs7-mime", COMPRESSED); + private static final MediaType PKCS7_SIGNATURE_ONLY = MediaType.application("pkcs7-signature"); + + //not pkcs7 at all, but shares magic with compressed pkcs7 + private static final MediaType TIME_STAMPED_DATA = MediaType.application("timestamped-data"); + + private int markLimit = 1000000; + + @Override + public MediaType detect(InputStream input, Metadata metadata) throws IOException { + if (input == null) { + return null; + } + try { + input.mark(2); + int b = input.read(); + if (b != 0x30) { + return null; + } + b = input.read(); + if (b < 0x7A || b > 0x84) { + return null; + } + } finally { + input.reset(); + } + PKCSFeatures pkcsFeatures = new PKCSFeatures(); + BoundedInputStream bis = new BoundedInputStream(markLimit, input); + bis.mark(markLimit); + try { + ASN1InputStream asn1InputStream = new ASN1InputStream(bis); + ASN1Primitive root = null; + if ((root = asn1InputStream.readObject()) != null) { + handleRootNode(root, pkcsFeatures); + if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.TIME_STAMPED_DATA) { + return TIME_STAMPED_DATA; + } else if (pkcsFeatures.looksLikePKCS12) { + return PKCS12_MEDIA_TYPE; + } else if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.ENVELOPED_DATA) { + return PKCS7_ENVELOPED; + } else if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.COMPRESSED) { + return PKCS7_COMPRESSED; + } else if (pkcsFeatures.primaryType == PKCSFeatures.PRIMARY_TYPE.SIGNED_DATA) { + if (pkcsFeatures.hasData) { + return PKCS7_SIGNED; + } else if (pkcsFeatures.hasCerts) { + return PKCS7_CERTS_ONLY; + } else { + return PKCS7_SIGNATURE_ONLY; + } + } + } + } catch (IOException e) { + e.printStackTrace(); + //swallow + } finally { + bis.reset(); + } + return null; + } + + private void handleRootNode(ASN1Primitive root, PKCSFeatures pkcsFeatures) throws IOException { + String oid = null; + ASN1TaggedObject taggedObject = null; + if (!(root instanceof ASN1Sequence)) { + return; + } + ASN1Sequence seq = (ASN1Sequence) root; + //try for pkcs12 + if (seq.size() == 3) { + tryPKCS12(seq, pkcsFeatures); + if (pkcsFeatures.looksLikePKCS12) { + return; + } + } + for (ASN1Encodable c : ((ASN1Sequence) root)) { + if (c instanceof ASN1ObjectIdentifier) { + oid = ((ASN1ObjectIdentifier) c).toString(); + } else if (c instanceof ASN1TaggedObject) { + taggedObject = (ASN1TaggedObject) c; + } + } + PKCSFeatures.PRIMARY_TYPE type = PKCSFeatures.lookup(oid); + pkcsFeatures.primaryType = type; + if (type == PKCSFeatures.PRIMARY_TYPE.UNKNOWN) { + return; + } else if (type == PKCSFeatures.PRIMARY_TYPE.TIME_STAMPED_DATA) { + return; + } + if (taggedObject != null) { + handleNode(taggedObject, pkcsFeatures); + } + } + + private void tryPKCS12(ASN1Sequence seq, ASN1Detector.PKCSFeatures pkcsFeatures) { + //This could much more rigorous -- see TIKA-3784 + + //require version 3 as the first value + ASN1Encodable obj0 = seq.getObjectAt(0); + if (! (obj0 instanceof ASN1Integer)) { + return; + } + if (((ASN1Integer)obj0).getValue().intValue() != 3) { + return; + } + //require two sequences + if (! (seq.getObjectAt(1) instanceof ASN1Sequence) || + ! (seq.getObjectAt(2) instanceof ASN1Sequence)) { + return; + } + //first sequence must have a data type oid as its first element + ASN1Sequence seq1 = (ASN1Sequence) seq.getObjectAt(1); + if (seq1.size() < 2) { + return; + } + if (! (seq1.getObjectAt(0) instanceof ASN1ObjectIdentifier)) { + return; + } + if (! DATA_OID.equals(((ASN1ObjectIdentifier)seq1.getObjectAt(0)).getId())) { + return; + } + //and a tagged object as its second + //if you parse the tagged object and iterate through its children + //you should eventually find oids starting with "1.2.840.113549.1.12.*" + if (! (seq1.getObjectAt(1) instanceof DLTaggedObject)) { + return; + } + pkcsFeatures.looksLikePKCS12 = true; + } + + private void handleSequence(ASN1Sequence seq, PKCSFeatures pkcsFeatures) throws IOException { + if (seq.size() == 0) { + return; + } + if (isCert(seq)) { + pkcsFeatures.hasCerts = true; + return; + } + if (hasSignedData(seq)) { + pkcsFeatures.hasData = true; + return; + } + + + } + + private boolean isCert(ASN1Sequence seq) { + if (seq.size() != 6) { + return false; + } + //do more + //e.g. check for sequence in seq.get(2) and make sure there's a data oid there + return true; + } + + private boolean hasSignedData(ASN1Sequence seq) { + if (seq.size() != 5) { + return false; + } + //data should be a sequence in position 2 + ASN1Encodable dataSequence = seq.getObjectAt(2); + if (! (dataSequence instanceof ASN1Sequence)) { + return false; + } + if (((ASN1Sequence) dataSequence).size() < 1) { + return false; + } + ASN1Encodable obj0 = ((ASN1Sequence) dataSequence).getObjectAt(0); + if (obj0 instanceof ASN1ObjectIdentifier) { + if (DATA_OID.equals(((ASN1ObjectIdentifier) obj0).getId())) { + //TODO -- check for null or actual data? + if (((ASN1Sequence) dataSequence).size() > 1) { + return true; + } + } + } + return false; + } + + private void handleNode(ASN1Primitive primitive, PKCSFeatures pkcsFeatures) throws IOException { + if (primitive instanceof ASN1Sequence) { + handleSequence((ASN1Sequence) primitive, pkcsFeatures); + } else if (primitive instanceof ASN1TaggedObject) { + handleTagged((ASN1TaggedObject) primitive, pkcsFeatures); + } else if (primitive instanceof ASN1OctetString) { + ASN1OctetString octetString = (ASN1OctetString) primitive; + try { + ASN1Primitive newP = ASN1Primitive.fromByteArray(octetString.getOctets()); + handleNode(newP, pkcsFeatures); + } catch (IOException e) { + //swallow + + } + } else if (primitive instanceof ASN1ObjectIdentifier) { + ASN1ObjectIdentifier oid = (ASN1ObjectIdentifier) primitive; + + } else if (primitive instanceof ASN1Set) { + for (ASN1Encodable obj : ((ASN1Set)primitive)) { + handleNode(obj.toASN1Primitive(), pkcsFeatures); + } + } + } + + private void handleTagged(ASN1TaggedObject tagged, PKCSFeatures pkcsFeatures) throws IOException { + handleNode(tagged.getBaseObject().toASN1Primitive(), pkcsFeatures); + } + + @Field + public void setMarkLimit(int markLimit) { + this.markLimit = markLimit; + } + + private static class PKCSFeatures { + enum PRIMARY_TYPE { + SIGNED_DATA("1.2.840.113549.1.7.2"), ENVELOPED_DATA("1.2.840.113549.1.7.3"), + SIGNED_AND_ENVELOPED_DATA("1.2.840.113549.1.7.4"), + DIGESTED_DATA("1.2.840.113549.1.7.5"), + ENCRYPTED_DATA("1.2.840.113549.1.7.6"), COMPRESSED("1.2.840.113549.1.9.16.1.9"), + TIME_STAMPED_DATA("1.2.840.113549.1.9.16.1.31"), UNKNOWN("UNKNOWN"); + private final String oid; + + PRIMARY_TYPE(String oid) { + this.oid = oid; + } + } + + private static Map TYPE_LOOKUP = new HashMap<>(); + static { + for (PRIMARY_TYPE t : PRIMARY_TYPE.values()) { + if (t == PRIMARY_TYPE.UNKNOWN) { + continue; + } + TYPE_LOOKUP.put(t.oid, t); + } + } + private PRIMARY_TYPE primaryType = PRIMARY_TYPE.UNKNOWN; + private boolean hasData; + private boolean hasCerts; + private boolean hasSignature; + private boolean looksLikePKCS12; + + static PRIMARY_TYPE lookup(String oid) { + if (TYPE_LOOKUP.containsKey(oid)) { + return TYPE_LOOKUP.get(oid); + } + return PRIMARY_TYPE.UNKNOWN; + } + + @Override + public String toString() { + return "PKCSFeatures{" + "primaryType=" + primaryType + ", hasData=" + hasData + ", hasCerts=" + hasCerts + ", hasSignature=" + hasSignature + ", hasPKCS12Oid=" + + looksLikePKCS12 + '}'; + } + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java new file mode 100644 index 00000000000..08b73d44dd5 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/detect/crypto/dev/ASN1Dumper.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect.crypto.dev; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.bouncycastle.asn1.ASN1Boolean; +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1InputStream; +import org.bouncycastle.asn1.ASN1Integer; +import org.bouncycastle.asn1.ASN1Null; +import org.bouncycastle.asn1.ASN1ObjectIdentifier; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.ASN1Sequence; +import org.bouncycastle.asn1.ASN1Set; +import org.bouncycastle.asn1.ASN1TaggedObject; +import org.bouncycastle.asn1.DERIA5String; +import org.bouncycastle.asn1.DERPrintableString; + +public class ASN1Dumper { + + public static void main(String[] args) throws Exception { + Path p = Paths.get(args[0]); + try (InputStream is = Files.newInputStream(p)) { + ASN1InputStream asn1InputStream = new ASN1InputStream(is); + ASN1Primitive root = asn1InputStream.readObject(); + handleNode(root, 0); + } + } + + private static void handleNode(ASN1Primitive primitive, int depth) throws IOException { + if (primitive instanceof ASN1Sequence) { + handleSequence((ASN1Sequence) primitive, depth); + } else if (primitive instanceof ASN1TaggedObject) { + handleTagged((ASN1TaggedObject) primitive, depth); + } else if (primitive instanceof ASN1Integer) { + System.out.println(d(depth) + "Integer: " + ((ASN1Integer)primitive).getValue().intValue()); + } else if (primitive instanceof ASN1OctetString) { + ASN1OctetString octetString = (ASN1OctetString) primitive; + try { + ASN1Primitive newP = ASN1Primitive.fromByteArray(octetString.getOctets()); + handleNode(newP, depth); + } catch (IOException e) { + System.out.println(d(depth) + "FAILED: " + octetString.toString().substring(0, 10)); + + } + } else if (primitive instanceof ASN1ObjectIdentifier) { + ASN1ObjectIdentifier oid = (ASN1ObjectIdentifier) primitive; + System.out.println(d(depth) + "OID: " + oid.toString()); + + } else if (primitive instanceof ASN1Set) { + for (ASN1Encodable obj : ((ASN1Set)primitive)) { + handleNode(obj.toASN1Primitive(), depth + 1); + } + } else if (primitive instanceof ASN1Null) { + System.out.println(d(depth) + "NULL"); + } else if (primitive instanceof DERIA5String) { + System.out.println(d(depth) + ((DERIA5String)primitive).getString()); + } else if (primitive instanceof DERPrintableString) { + System.out.println(d(depth) + ((DERPrintableString)primitive).getString()); + } else if (primitive instanceof ASN1Boolean) { + System.out.println(d(depth) + ((ASN1Boolean)primitive).toString()); + } else { + System.out.println(d(depth) + "Not handling " + primitive.getClass()); + } + } + + private static void handleSequence(ASN1Sequence seq, int depth) throws IOException { + System.out.println(d(depth) + "seq size: " + seq.size()); + int i = 0; + for (ASN1Encodable p : seq) { + String s = p.toString(); + if (s.length() > 20) { + s = s.substring(0, 20) + "..."; + } +// System.out.println(d(depth) + "SEQUENCE " + i++ + " : " + s + " : " + p.getClass()); + } + // System.out.println(d(depth) + "handling children"); + for (ASN1Encodable p : seq) { + handleNode(p.toASN1Primitive(), depth + 1); + } + + } + + private static void handleTagged(ASN1TaggedObject tagged, int depth) throws IOException { + System.out.println(d(depth) + "handling tagged " + tagged.getBaseObject().getClass()); + handleNode(tagged.getBaseObject().toASN1Primitive(), depth + 1); + } + + + private static String d(int depth) { + StringBuilder sb = new StringBuilder(); + for (int i = 0 ; i < depth; i++) { + sb.append(" "); + } + return sb.toString(); + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java index 0c5ade3681f..973104444d2 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java @@ -63,6 +63,7 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, try { DigestCalculatorProvider digestCalculatorProvider = new JcaDigestCalculatorProviderBuilder().setProvider("BC").build(); + CMSSignedDataParser parser = new CMSSignedDataParser(digestCalculatorProvider, CloseShieldInputStream.wrap(stream)); try { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java index 2a0e4a0f957..357d37947e0 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java @@ -24,7 +24,6 @@ import java.security.NoSuchProviderException; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -81,7 +80,7 @@ public class TSDParser implements Parser { private static final String TSD_TSA = "TSA"; private static final String TSD_ALGORITHM = "Algorithm"; private static final Set SUPPORTED_TYPES = - Collections.singleton(MediaType.application("timestamped-data")); + Set.of(MediaType.application("timestamped-data"), MediaType.application("x-tika-compressed-pkc7-base")); @Override public Set getSupportedTypes(ParseContext context) { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector new file mode 100644 index 00000000000..5cb880c5777 --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.tika.detect.crypto.ASN1Detector \ No newline at end of file diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java index 42761d3de2c..ecdaef4f92f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java @@ -22,6 +22,7 @@ import java.io.InputStream; +import org.junit.jupiter.api.Test; import org.xml.sax.ContentHandler; import org.apache.tika.TikaTest; @@ -31,6 +32,8 @@ import org.apache.tika.sax.BodyContentHandler; public class Pkcs7ParserTest extends TikaTest { + + @Test public void testDetachedSignature() throws Exception { try (InputStream input = getResourceAsStream("/test-documents/testDetached.p7s")) { ContentHandler handler = new BodyContentHandler(); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/test.xml.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/test.xml.p7m new file mode 100644 index 0000000000000000000000000000000000000000..ce21cc961f1575ae4855974e4f0b806351662956 GIT binary patch literal 2504 zcmd5;Yg7|g9%m*)gaTrfqAmreQMB?%?gWy68v=QOvE*SAw8fs)F}cK{LxxUu%v-BPAuw`sk^vUDsE&9<6&PKnTY#`*G)- zWd4uez4N>O*9>TvC+2i7?!KLZO~D7+K|79NR4Pc3C$8#Vj6oTJWASky280c?&ub?- zuw(kt<~o*agnWqMf+Y${2^1s@`Zzxm43sFUyyf{t3Nj?nK|jrML0F<_fgweyLGOmo zH^7i!3;JQRLGOWVjRl5$JX0^AeJ1UPc0>TKf%n1jyP5ODhcI5N$Gg(#a_Lp0UIR@=Fl$&0%&s9mW_~#2%wdJST_1pf6`wSiy4*^+@D+h(c`Uf&vT*Hou`fAJV>Jjk>rd zzsSG3AiuCcTa>@rSEI}KL#?)k*7;Za{FGkx&_ZOI*j6OkzGl0c$dRz=8X;5IMy7?>d`k0dAC9OX{4)$JucE+dr&rc37d z8A(7_T&Ph~6aZQ(1I(7JNy*c#=rp7m@&Z%C_-Fy0k;(-NC8sP|VGGu9l8^L2zL9BV zNIx@yZAIyTAZum{r3OVn3yKR1)KoQ6H4zm^S2gIs(kJsQ2D(w6Ai~pVx4NyQM`>2N zltv|)Pg-nlYb2qh+gf2m>2;DeyUT6$SgCSg8D(AKsj!ePx6|?v23d_>n|ZO7^ja&e z_EA~obvkSg8)*y*T#(@?#CG(6D!0w$1(eF}wNj2+ANnoTI9ivsIBK8MIK1{|ht~=m zt$`+o#aLjk4K&9_Q(Zt66sb`(byOixM{|>irDxZpT3snX7S_=|ZY9qsSs0*MKj$kO z7nF?L~T<2bM?X=}zdlhNouZMQduf?a0 z%)3uyUF*Dh_4`f1^UJom)1GE~Up-+uuyyX;1aq6WS9hZ){B7K=hN69MY}al*`Kioo zIQ{g@9dzl7S#8{&O>eGm`#J9II|&;fn1}Z_fYx<$E)M_ptCQFNG`MA?ZTIG!`8^wO z85W2Ap#$eZ2Ob0vMJtmeegmEb)pB`mG4$N zksiCvZY9ywL3I;g1ezs7Odv=L4LmA2mO))KQB48_Kzhuh#6qw@{2xOYWq@IX36ufD zI3s{{tJerZ{UVi$^M&e_3@T(ab_5i6mDB8TN%ai*l{T|AsgITCCxF$<)(ry18=cyQEz zKF$21=IE84RmFjl(jg#Tkq~nT@d6OP1LCd$ad1G~(JlU-_0!T3q}!w;_=4F5{_%Kc ztHD@%BFWF7zi2y0T2wN)D*=_qHK0{ezsEwPj?d$y?Z&1Ca-zAB=aWp{QYmPolL5M6 zBZnpg>iSA1B!Hp;?Lc8P%1up8N*~SFO`Qo|~NNef}kL=C3 zx9E!gpWC+W2`|jXK3oyo6ll)LIxsBGb^Q4~-~4pBr1ACzP2YiiwOQ5p6CSN;`JVbO z-uRkrU)0qdWwpzN(?ldpAZBZys)y?;PB^5nECgK6v%1B?|%{+Lha1O}{pk?Y;TVH(REk+k4%8 zSN6{R`S`KM90T)t%E5Op96IsQGyl1(T(IeH|2UhW>pOZ}S#smwLuvd84^wgGgUu-| zA7xog7YELMm%8!Go(dgw?dEgth+QxImEHbx_a|Jbw(wI^!mYt=yMF2aBOAW_)`m|M zusqi?H~SKAk5_!$dC&A-@d4fYe?I&6tn*(znO+&%{9arf)q(9qvvddgBmm;Z|C8Y# zeJ{m)pA0Zy*2FNBQAtr{3I-mJDv9w_Dtc9Dfd)-uG)6VLYM`3%5|n}H zbN7Eb9Im}e29C^Zp-U2n@z>V={d(`2;n+$+nFXB;Ii?Uz?QwOHM-aH1@&k>-)|eZFbka zdoEKy%kxxKb?36R73Wfa*QSbJYSQL*LhrsMZOo&Wg2O9r@4of*Y5%dAGE?HW8sYWD RI}5`tc_%%6y`yjM{{sEWJ$?WH literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_def.p7c b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_def.p7c new file mode 100644 index 0000000000000000000000000000000000000000..5f25350c4861dd4768555c68d256b9fe029486a1 GIT binary patch literal 2972 zcmc&$c~nzp7SDTm378NB0$~%0sDL2hOIX|)AnYwJtb&M;5Ecmu1|nLeK!l*;g5VMm zF#dXYvM@Rn#7&Qg3bZl%>QoXtk{=f>6ba%YnG=_4RFW@Gz!%cj zMvCH@`athwh%PWiLo%6<60~&Pb+O~Q7hyH=%8p)M6sJU|E=05f zm|y~$2o2F~97|TD-e|FSgYV{)xZls5qc=|-WVB*@Z6u=sWL%NVNC2a^ZDT}pxKRRz zfR`*GqX}}7INPx7l+3TDFsCYG!kF^*Dg4=1j09d%5>F7r6DbW4qdCf7rB#&+b7Nr{h|qv8QQevd5eRB{^iP;L!9a|On<4_h z2ljH797q|l$;pY!ZEd+CzTLDIv`vbQWZA?<+9WA^%qGT>fy(@+mi-^m`Vw&GBnWX0~xziN)Uvk05Mpg zq5&i|m`F&-O-h^=8t|Qzf~791w9&>DB_~Z2v{j89HgVX%P0rirabvrnl}H8}SWZ-| z@-V;w3G_9Qgq9hBdbYy|j7T5>`rQo0;4LU;ltx!Z|Y`yelwI2flI+O*Q#HPhs$@({_NLvRO#0Xc%W7f9T>e0>D5}SqZIDne><4D zx8)L{rTM#aUFS!S<^RRF8Ws6?`si80YuVEPF)W>zBrzrh=;!}HljjsCNE+wtDL2H4 zarM0qubA%Law-4t{A1;LJ3N;LA0eIdduEh-G|KV7+&ftlj{TI5J%AfbU;pB-zUe<| z!KeL??Ywh-S=GlhMcRi7f~Q^O)sB!JaWqWd*calkyqTzxf7`$~ka|4t^pZ%JqFg7OK`w!#Od(dN-Zz8ujp{;Kr99hA7v*I{Wg#`hekse#U3~hYdN5 zfo_-Gf&1Co&h)E?(uRbx?C&FA-8Esg7*%Lj2i2HGU3#@^0mm`5vqnE`XtOZDC)Y0l zegpYd@f+H$M~V-%4p1y?b}$lyM_PJv=o_xOS=#L|V8zf1T5@I1-zpCEa2&%gzt852 zq|fAkw^_q{w(^vu&*wqCCB5CZsDNTp(%(GrX>hG9{*gv#p)jal6!v_TxyPG9N|(-F zlQrKz$rbD0soc8UL-f2A-IbLYFA3I zyk2kFQCML;pR%oG$3Xa&S|2C%(TrLe?bM0+I=)Zu=c!kS%Zk3csb4kM&4HKZzi`=| zIiy0av0+|vENMwolb_d(WuYZCk2YTWNlveuSCVfo)b*L?(mI%M{H>SmJG`PKHqYNS zydTCxIH*(td7~9^2Rf7cPaP1^H3&mEFb$0$>Hk6_d(}op_qZi6|ATw{3(T=8;s#uS zv)oC(Qo3BV%tibeXryYxXe)88Y>q zQt8wjhaYM1gzn1Bjnv^`Zj^QN)OJJPjmo8Eu=mIw{6NCPuumqjFMJN}>RD&IKi-X2 zT)|az3HX@>IprB!GB*s`ul}ZXd)}fBoAk}E56~ZNFZ&JeNqwP^{ur_)^*wSlzB^?7 zs^7Yq3&RYSW|j_|l#K1$AY-n+xvkh}EbE4M`CE&e0?{X@@_R1i2lpILHOvusdZ&f= zdhNvbj^%jEW#+EVFMIN|%c4lZ0oN5KJ#ct@#nAQiLLJesLHYN`8$cEjG?3F)j|US@ zEEvpcwjMnrK3ic!8&2JBQT4Lv1X=Axc~&=6O1E?gVOw@(^q2qZ)s7V@HO`!Lf{Qye jtWR|MxzX-;Om`$hl>H>`lMXJ}d2XU&ZhQ499256%M>Gn1 literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_ind.p7c b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_certs_only_ind.p7c new file mode 100644 index 0000000000000000000000000000000000000000..c0bc161e84cccf50ae22224e38370019eea75d91 GIT binary patch literal 2976 zcmc&$dpwl+8h_t+-Wi%P!Z2>hq(Wr;W?Z^E824RWTqB7YL265w7;U!wwL~$%@V1=^?G}B29Uk@iX!y_gtn&Fg?kR%Ql^7ss2zIZEN%-}eIxw=%=5*C|f z2NX{*MlJ(3Y&MIH)4?ESdPFp!iQ8ADkvZXlm`IUW7!%H#I8>pLz4=0ch_N-_mUqda~a=m_VgJl zQIq+MHKd0;H7u2&dO(6|17j>Cqxl#?Pt#r(J)UzBQAt|a(aVqEmT1)liB|w4TtFSA zquPyQiSm>iEoN^B-P{t_`x$d|=Ba}8R-CVirZ<4}E0P%r;Pkd_%t$UTLdX>I6D1TZ zL2d$f8=jql`PCHWRApQkSKdB_Kih&C$4^M$3#0gAg#l6|SMjT`s&t`>yHFf!VDi)I zDWOi+SDRLBbv%*TFg%W;1Vkn6bK(n8I?yJmT9RNQQH6m02@_ukNW#U9Q6UfjTNzsh zq|E5V#Q5db);zJmW?BncCq##{t)j!N5)?gV6=l!FWd2jj_K#?NiMwT>u7CrUkY
h@Tc3@Sc=Hq)y7TvBniACQK8wR*oD#arnSZ&fDj4<2YfJNCawlPBgso2!MbD z-bEr~Wk%ti?FfpX5?F}6o8f3^D^zxQR!+6vdhM)wT1iJ#Jb9&EKKjw{VO@XnP4zZv zp9TcMVSza5VQAF4?K0`D`=Z@wU_b74DePMHi}6s|?%ALHI*%&+dH^@f>Y)Rpw_%-Hvvt(M{rhhRvi7!IBDOSt zcdqOF=&`)N7*=B;i{m7XbG|OuhmxTB-iKF= z_inkAcX^4uNnO9PLP&-pwv$T=Eee_-yN%n8Rn3de511!1gz@mKG(A2pEEzQ=an zIlrvxW2!v$Lj}>@rt)e>P)|}M!r0gsWVgJTq?UJE&(NQCJoogHOGh_uUc9(Db5FOH zE$Qxz6-V|%MU};YhRsxd0@TY@IfELGrq(6T;@Qx(;ol* z98E{Y)kCR6qWrAy!(ZJsVz(GnXjTW*m_%H9wR-{AKBcopH)LqD$j>XsCk}Z7dshh> z+AT+l548?Z&8&7X;{!)pda@ZCuDY1p?9gLJ(FMweE%dT zN%v0W*5z*E=dIYTtOOE5!31idkA5lZ3wir4uF=xDZlli086^XP=@{|nQOgo6!iEAv zEfeJk%>*ph0EUl9l&C~38y1*^C|xRh=@jz+fT0rDWNJxZB7}9(Y<#xK2;NRL|Fmve zKx&IaH{Bm5N06#gOicOo3(k(mZ}pu#xmCrX8Te=EUtV)qJ@~M(e?WB;VsWuEe~99E z|9a;NahrDWa89M>-!62u)p3^hQYiQC9g#a;Y%?14F4`1WD_t91Am96-tCeVc_E>vg zS*0Yys^$FYqE>y$natUtF6%9p92~QILs2zj2g~cTi_5OA>oj}((+X#sQlk0wdh?FL z3d{M_Z7n+nLbudU9s_^;QHsrQshhvpc< zNP|0kH{aAi6&dD5ST;{>H|*Z1SXz2}k9#X<3y3mU&c=9fx zAfq6=Jbg>XhC$oa-_&l;UDRQfw)yn|#-r_JzY*MNFXYl6gSMo+M~}vK2d!WATQ_TA zh~CnS(t(qbv3(ozS*veuD>fL*yy02?)-1a~{K=vGo)hK4J^ND)bA;}msiD0dI|;pG z*`Bg|Q)kDQJ-M1?5#&I>>vE$WBs8{S=z3bAmiX6zy!+z~Ad>_dD5#n0KZ3m;dY4jupu@j@&e&lPf%|OLF+R j(dKwmcQ{j=^(5w#78K|>H(oxsy&8f}r9lu2`uzM3p4ka2 literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_long.p7z b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_long.p7z new file mode 100644 index 0000000000000000000000000000000000000000..74474876ed6a03777d9249b69ec65cb3bf2c8347 GIT binary patch literal 331 zcmXqLVsvNY)@t);`_9YADZt3Npo!7Upo!6liIKs87fFVh11KuP#tBu*&InY<%hJTi zSuv-z|E$+x2a(qA;x+BNPhL7xZ`>%5Va(=oBx>>u9d-eQ~@_0D^Hsj5Sfqybtir&6DruRoqTPj#) zzE|iOVN>B1s@+@tzE=5M-nrpp`JCG4W)6W&wmj#i2yvOGv-fKL`IlM0#bthmk4&L+ z`s}%{m+jDg?>zmb(N2?y)NZA##?HdiF6}hTH1N`_@D#Sx=Xsvl_g{J8<=oG&oW5LG XCZA*O6Kjz!;3{seTEpnuo*M@MX@rag literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_short.p7z b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_def_short.p7z new file mode 100644 index 0000000000000000000000000000000000000000..da9fbfaf1beb4bcb008db089e84e1397c90bacf4 GIT binary patch literal 125 zcmXr0X5-ds^Jx3d%g8Cf$hjcLAd88S!GIS@l$paIjExhjh@EkPCyRT<9PSTkX%CYa z7>o6s?b%cga2)Amyw~INjc13*3WrFp0IP}Ir&3ZTB+Pgoqy8iNt*Bdx)q&Ylc^9=! RRFv>I-CWARkQ6Lv3joadD5d}a literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_ind.p7z b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_compressed_ind.p7z new file mode 100644 index 0000000000000000000000000000000000000000..6ec9716b65f03996edbb4aa7fd03dc93c35b9db8 GIT binary patch literal 141 zcmXqLVB^+m^Jx3d%g8Cf$hn}wpn-{z!GIS@l$pbzfsGTYh@EjkgGvJnQ^gz>SMCpK zX%CYa7>o6s?b%cga2)Amyw~INjc13*3WrFp0IP}Ir&3ZTB+Pgoqy8iNt*Bdx)q&Yl Xc^9=!RFv>I-CWARkQ6Lv%YXy`Tiz+& literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_def.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_def.p7m new file mode 100644 index 0000000000000000000000000000000000000000..dad330064d70ea9799d3fcc0bc100dc5dda1fa56 GIT binary patch literal 350 zcmXqLVvJ(r)N1o+`_9YA&b*+B(c7Sj(VdBr!LW%@7|7%YG7TyWiVV38IN6v(S=fY` zT!Rhy4R}Eu4q-Om#FV1U6qo=zT!7Dj2c&{anAO)aq|(_?+CUP-=N1+M^A%kDJW~|{ zib_+H5{nH*41_?E%);ExjtZs4rHMtE`Nd333=4tQz#PZO$jZRd*dS?9zj<%&4CY9| z2R1F~Nfr<5|8yRoIInK?%HMzb`V2n_@Avb&Y23tI{(RHQ%EBA9&t6(b_HmRLt!6m5 zforFBdE~nOcD~8kk<63dK73^qoANfIBIuNSc8;4Z*S<{$a#Av$Evl3Kn#$*WReF_^ zT@5>ntxM@K3Ck&ao%W|Nv3^)@pbd8sJEMUJ8wVupnAur4ejaM^3%>Jrb_0j+q~yPz I7bX7z0GGybVgLXD literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_ind.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_enveloped_ind.p7m new file mode 100644 index 0000000000000000000000000000000000000000..eb182562c8773cd67d588bc299f608514d151530 GIT binary patch literal 356 zcmXqLVB^$k^Jx3d%gD~WpuwPliIKsuiBZ^~iIE@3G^j8rGUPVkWMd9xVH0L@4L0OA z;019wgxP!(Q;ITEU;^xL0X_pBkP0qgR$tGMN@qi914$5{TUZFpS8(z3OjQUdDossF zEH)G|5CTau3v)X=DwGzNCKhGp7c((2ECgBua~LBdD+5bogZZg%MKjk(o2%*Qru($n zUAw>X+qx^}U$a)K|I2?J;<)y9{j0nw{fSPOCVaE!PG%0}aP_^Vykh%>UaPp>NlW^< zdzT-t5xy^1CoZSEer2~?53hBB?VRf&!nt2Vx62~H}-10IkHE@4(*&yY%ILums^5T9FE2+UV-@$*bo2q-E|O-d{_6fqD2Niqv_ zJ3A_r7MCU#W#$(f$cghB8W@-uSQr`^TNoKdiSrtPxTa99K@CmaBxfK4ag%5$#6b!v zsR{)U2P>Ev8Z^#F4kJcZ2Ij_I27|^`S`JUDW!FQ7$o@H8d=7hx^6Xh`Fs+lG_HrtqL-#>q`b8_TN z!F`8}*M2{fvG2iRJ?jwFubfW>KD+A$FrHj0wL9SOY_lJt|4%*tZG1l|QG4aoM8=b< z760tr*qgpGF*7nSE^cCsFlb^70mqxHFbk6b12Ep1LuL6{SeThu9~j7jID9N(EFw1w z4!_QtdGEpA>vN6%vQ3U`SrjH~&^QAmuguao!Jx6XLFDVgtQF<{(HmKMme+ZFVoh@IgYJJt#lFM8Vk+C7zoYnIS$B1pCMgGY9@An^OfotgWhL!m8eqv1@R0x7{%`$uRi<19 zDT&uC*A?{q>)7;>^=$987qJp8k3E@At@z!irOBSFXPvg?G%-m6bGt#sK;&0&v4LF5ENJ{?AOSB%IGGKROC4bDf)zoWtcLu+tZr#wXk=n& zYHVy`5@n!|kYzDcVo~6_QWv;P;jOLGU3F=*^V z=-@YOY%r)pD)AV#3^>@>v|5>&m^k4qQDg;T$Q&jEE;bHG`N+%;OpGu#sAOyaMm>Vh z=wQIk#;VQ7%*2i=u7R9FfQ17~V*~#lhsBSkGX1~JR%KLkzWmO|hTU@-e>@aS&;Pnj z)%BW0YG|qE#Jr!^YVY;sUJq-_d@d%kPx@J{mR!oKU(R>>7a5rbe2bQ3a>>j&?$o_7 zD*d5J(#m&ROD;v}?+nT_&Aqbk&Ht_ghHZiCeuRoDOk`FpJ~|_AScdiXkcJsU}0!vY++;+CC+OE;+jIa1~oKwlbnGJ#7&~15C^ zUY~39mu+%n%c3w@gT@&kd1aQy2?mY54I*C`X00gqkKV}Av%JpZBj5a^W&P3%i_PeYB?``tDDm9H$PDq3AlOG{AV&(Z=o{#^X=jv_6jLnMI7}$dJ$+MUN6O_>cg9UnRIz;JC zF3JIF&q0oQVD?}J#<}J9y}uReWL8&Hq(2O3dzjt!i!aMpXyxv`Gd&MWpX{G^_wKTn zpC=i4v2Z^w$gEI(*#BND^q3ZVVv^C}C@X0f(*R4BfQS6o_J8ZQt1{&>NJ+eAxvrq+ zU&p48tY>?#y@-`)dF;t_YQ^t9Elu`ZJ?peBCzmrl>k@Oe&0INA(KE%Ffx)neN!Fl= zNfMaf4JrmA$ASwE4GT>li(`sdAV&a6eM3EJUA#<1vxY#%#E@fs1CPx?>R5msMBOk$MbTD9N zW7XzkW@1Mb*Fa7qz~X_Wv4MY&!{SF%nf~8qt1_xNUw-Ff!|pkaKOPFE=YQR%>UvEg zHMCT7V&2bdwfFjRuZOi|J{J?&C;hBeOD^TrFXub`i;PSIzC}wixn$-Xcj{gkmHyBq zY2~}EC6}V~cLwE|=3d$N=6}}#!?wV6KSD(nCNe7)ADs}r!RXjRncW}G&hHf5#{dQZ D;e^pj literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.p7m new file mode 100644 index 0000000000000000000000000000000000000000..1f0adb74993727bb73b7782587e0789e8cf73edd GIT binary patch literal 1574 zcmXqLVpC${)N1o+`_9YA&a|M3P1vA`jh~5;(U8}Gn~gJ}&4V$OnT3hbKp3HnaRD0( zOG#=)$$}>4!v;;v`+-=1>+kVJ6pLLw=w< zki#L&=9`#Ol$in(V22Cv8SsEqa0#>edWKXw8%i5Ug81CRLSVjvi=StzLO@YzYEojc zp@@MHNRnBY+u2c}w74{}C^Nsv$-_G5!@Q-@-XEA|8*B2v$oI512)>*2@GR4sGbb$Wm?(!SSIsohvDwB{ z`~LZhos%PH3hp~(y!QKsg1We&u{B@Y!82fbrx~soepGXPf;H{eSBDZ{zz( ziP|ftCNiE>t@vl}#@_UmiJ6gsad8u4gh3Ny2sqwkg;|&k7=Vd^IaHRPg@u`k^?`vb zh{MMs#v*c~;PC6LnfD&-y*}6IFWcnEmPKK*28}a7^2#iY6AT)A8$`Y?%vw?IAH9*K zXL+5+N51(-%lf4k7A+|-$b&^bwNitC0Uspf*@N=)OB9?PQR2CYks0D6L9ma^K#mk* z(Kpa-)6OUplCMcr?1`G7sbcoWOT$BUUo`W3q!0f>cjC0HHdw(m`$*iuZNPig6_AtBc7hjgI z(8}F=XL=r%KG{F-?%icCKTk69V&Q&VkXfPnu>ZYQ=rJw!#3ZA`QC89}rU8~L0T21F z?f=$qS7pj&kdk=Ka$P~szm82GS&4R{X1`_aMgp=72CHikpUY$?%#X58sg-J# ze|4?K=;TF%#y*4&e#6EFgF2*=k5S8jgN;qAm6?f&6V4JvRv?DVVKU%io%W@ke*cb0{?Bi`qiuKZ8{BYBjY%S9# P6`5Hq@yl~-u7v^s<+#mz literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem new file mode 100644 index 00000000000..34ef17fd53d --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_def.pem @@ -0,0 +1,35 @@ +-----BEGIN PKCS7----- +MIIGIgYJKoZIhvcNAQcCoIIGEzCCBg8CAQExDTALBglghkgBZQMEAgEwEwYJKoZI +hvcNAQcBoAYEBHRleHSgggPDMIIDvzCCAyigAwIBAgICAKEwDQYJKoZIhvcNAQEF +BQAwcjELMAkGA1UEBhMCRVMxDzANBgNVBAgTBk1hZHJpZDEPMA0GA1UEBxMGTWFk +cmlkMQ4wDAYDVQQKEwVNSVR5QzEbMBkGA1UECxMSTUlUeUMgRE5JZSBQcnVlYmFz +MRQwEgYDVQQDEwtDQSB1c3VhcmlvczAeFw0xMDA0MDgxMjM4MjJaFw0yMDA0MDUx +MjM4MjJaMHwxCzAJBgNVBAYTAkVTMQ8wDQYDVQQIEwZNYWRyaWQxDzANBgNVBAcT +Bk1hZHJpZDEOMAwGA1UEChMFTUlUeUMxGzAZBgNVBAsTEk1JVHlDIEROSWUgUHJ1 +ZWJhczEeMBwGA1UEAxMVVXN1YXJpbyBkZSBwcnVlYmFzIDYxMIGfMA0GCSqGSIb3 +DQEBAQUAA4GNADCBiQKBgQC2ehoLcO6lXWmKzJfdz2m+vRZmGeDo5OF+Q8MNdVtL +8AKbMykP6G9JOzBT3WLhzQKszMg43DQjViN6mTQsPLYCfe/n6LmTWZkRvsIzrffM +aL7goy47VCX1CeUQ80cuUAHJpRq7UMObNvgV/8rn+zPfYmErqZVhAckleP4/RgeC +9QIDAQABo4IBWDCCAVQwCQYDVR0TBAIwADALBgNVHQ8EBAMCBeAwHQYDVR0OBBYE +FNhww+tqmd7gvdedMv0Gk1mEolYdMIGYBgNVHSMEgZAwgY2AFPWhaqh3T1uxBIyn +fkjxDp/Fdo8boXKkcDBuMQ8wDQYDVQQIEwZNYWRyaWQxDzANBgNVBAcTBk1hZHJp +ZDEOMAwGA1UEChMFTUlUeUMxGzAZBgNVBAsTEk1JVHlDIEROSWUgUHJ1ZWJhczEQ +MA4GA1UEAxMHUm9vdCBDQTELMAkGA1UEBhMCRVOCAQMwCQYDVR0RBAIwADA2BgNV +HRIELzAthitodHRwOi8vbWluaXN0ZXItOGpneHk5Lm1pdHljLmFnZS9QS0kvQ0Eu +Y3J0MD0GA1UdHwQ2MDQwMqAwoC6GLGh0dHA6Ly9taW5pc3Rlci04amd4eTkubWl0 +eWMuYWdlL1BLSS9jcmwuY3JsMA0GCSqGSIb3DQEBBQUAA4GBADn3vfsgfhyreHhn +4VCG4WuG+g5qTRKpu72ZScMbyY+e3d2m6fOSMkoEC+NwaXgl4Y/vKlXGKgdhYjLD +WjobRDVQOQRQ4Q/Wv/aPPno1CjBkYdY5rnCM/oiy8QXNjdboXRiE40kCyqj7jiop +B20uO2a0yacC5ooWQz1pqZEhSWRDMYICHTCCAhkCAQEweDByMQswCQYDVQQGEwJF +UzEPMA0GA1UECBMGTWFkcmlkMQ8wDQYDVQQHEwZNYWRyaWQxDjAMBgNVBAoTBU1J +VHlDMRswGQYDVQQLExJNSVR5QyBETkllIFBydWViYXMxFDASBgNVBAMTC0NBIHVz +dWFyaW9zAgIAoTALBglghkgBZQMEAgGggfowGAYJKoZIhvcNAQkDMQsGCSqGSIb3 +DQEHATAcBgkqhkiG9w0BCQUxDxcNMTkwMTI0MTM0MjI1WjAvBgkqhkiG9w0BCQQx +IgQgmC2ePrmW9VnmM/TRlN7zdh2Qn1o7ZH0ahR/q1nwyydEwgY4GCSqGSIb3DQEJ +DzGBgDB+MAsGCWCGSAFlAwQBKjAIBgYqhQMCAgkwCAYGKoUDAgIVMAsGCWCGSAFl +AwQBFjALBglghkgBZQMEAQIwCgYIKoZIhvcNAwcwDgYIKoZIhvcNAwICAgCAMA0G +CCqGSIb3DQMCAgFAMAcGBSsOAwIHMA0GCCqGSIb3DQMCAgEoMA0GCSqGSIb3DQEB +AQUABIGAFR6+Q41Ak8HXUNnbkEQDWN3JBacSYi5mAPtHyJNHGLewKTwxOrf/nGhk +Qq5zqvMLhci0NBU3wvW+lAKX3ytavfkRkRFlTruuH151Stkr2TRW6h132ggA2AAJ +3gGxGqN2bYAYB6O8QY4NhmJdSi+j4bK0ayo15HhpagRfp2181lU= +-----END PKCS7----- diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_ind.p7m b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testPKCS7_signed_data_ind.p7m new file mode 100644 index 0000000000000000000000000000000000000000..cf5408395119268812d716a8bec72cedd7bd2181 GIT binary patch literal 1582 zcmXqLVB^$k^Jx3d%gD~OpuwPliILHe*MOUiGoj6cF_oExiO~Q_3FCqWl?E1;lGKV4 z1~6FA#C+JGiFrQ|Yb;=9Vq{`sVpwRv3pbRJm6gGu$dKEBlZ`o)g-w{rHQ10JC=cXt z2($SnrW9qSzy#Rg0(=HMAQfD~tiGNhmClCJ29h8?x3CbHui)b6nW_*_RGONUSZpX_ zAOwp}IwG1b0* z{$l6k$eDus4jHfgekNnzgT;E*A*x?Fp9*|-*9%}gxm0R*z~R|uKSckZdj8w^eo~_L z%BhKrCsix{*}JhfePv>1WMEv}#28`F#25mOH(6m8CIbdwqF@e{=2Yav2HTugoIkII@n5;qL43NAsOXCEC#@+^zuM4wQl>0|-Wa(L6 z=kbwm{?W32>4im03JmgKkx#AEAYi};33>LQ{QMFHXGfHHZenDH_(%}!BQuaAg;?|r zblbEuN=gc>^!0Ny^D>J|Qj2sgveGLmE%kCUODdD~64O)l1H3);ogMX(i%JY^LHgub z%zz2XXo0~3y*3@9bSD?(0JY~JM?ElmFazV<^84Q33UxB8D=N|-2DClQZu`ZT3V{%iZc z_1jgMav7u~Ub9?R(DScj(?`~`z1LpEO0+!oWIDCtcb}Ffd#;{!+Ln{cnVxltIooEg zoT%uT;%wN&Bx}&bBniy@1{DL5U%|x&a;dYR@t1)FycpqRHUvt-3Mxim?t*bRSq=Gt zS>4jW(8$El*u==tILbgDAGTIF9| zt1&ux(V(#pp@ZMBvB97YsRU%yGT>li(`sdAV&a6eM3EJUA#<1vxY#%#E@fs1CPo+= zR5CUIqaML$bTD9NW7XzkW@1Mb*Fa7oz`}v0v4Q8Y>Z`}iO&XVnLDqQ} zle;Q^=t*d?aNSwCDO8-}m`jFM_T^V`rYm36Pkte=Ztn$VrvnRbeR%OZ$|Ne?(LGgO z|8d-!A1;Y8;<6#HM3+r*-Tydwr;E<9i_=@Du^R7qDE21hUY>W@s#CjM@)CZQrT1%Z Q@>4NiJKwde15(-p0FkTIMF0Q* literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testRSAKEYandCERT.p12 b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/resources/test-documents/testRSAKEYandCERT.p12 new file mode 100644 index 0000000000000000000000000000000000000000..1c536e8fbeaa999945c45b22805cccc720882143 GIT binary patch literal 1717 zcmY+DdoYUxT_x3GyGUUqF2z*3_I3J)3fUwZPvizC|V-!L0e?CYEgBS!9cH#(* zuV&Xqu%#{@=x zR$lf#%F|3ZIetT`MYOn^ZoUTASI&nbR=9-_7;sp2qsTiNJ zlJlk>>-Oxotz9m9>$$G!URm`=W0iGoi~=J4s-_Xz0okAZwr;g!_@~b%ahb-ZQ0o3+ zv7p$wMcQgsR1J$Ma_Y`I8Tr#Ykh&kUGU*SL#-H(@X`_Ff>lJ?L>oiT6AvgDo8oGN~ zqsJnIV#mtWTq<)*$E!9+yoSre^0ayqXrSa2<$hDkQ|-v8_Z02N#Xcz8jjmhilQ_xc^-e9zbCMugBz%l zt+<+B8iQMAR!!nxA{Rb*L0JyLC*qk8Q-B4;iodz|q2!Qd!y_F~9;Ck_&*y|lsF%Gr3es(Yd5~)+N=T1kF9+O`wPe-~0p7QFp*PiIp-T>y_|IBWP z$$vek@6<<4Gv{1rTT0}o@{r4DD^rYwY@>ovTu{J>w?D%eCvZ)$)kW|o<^k7_F@Dvb zwVp6FoHJCD##@pE`a5QkDkKZ1l7gG1sT$$xzvh_n&#=2l?|Nq+H%)0L-*gvT)&fJGOUud^4nrdm z9;%X;+^UB-@LEJ@uZomif~YhdYiBDGHPgCp$iVWqcOxnal@Te!z730V_nD}F#~l+jyR5t)|aNVqZU z?5AHtT%ghBEtjCinr)4fvHY_47&0LEACVrB0WOpb%pKrA4`u>{|Era_FmMn)Ie?}A zXB*^;ZEZyX={7o!_%F5{oF@Y>&dupnZLx=VO>lZ6>*6(hgqMnyq+YKxNbu}q!|pE9 z{6U?3?|IdboGAJeJ2U6h?6G^N_rzVNzBl3$w73Cxh*QKPR6FFz*&92p^Fs|*FZCk|OLn9ui5(amdJK0JVZWlV}6 zmdy&k>8@-zYlQ4kN%&(HgYa!AO8oTtOojY<%~EPcY zH|U4CB(Ik8A$uaZS7!qQth-dT^Y;n27zj;m$Fy+S)Z5t%>zLY8>gkKFEL9`{9d%ys zR?BE8kQ?SQN4zN54=f*1Ur_5c%d}+o8=b4#s%j3*kR4?-$-g=KPLibC6??r_>vm$h zv5b1EwQsQBsDq1(S}M$t&AooU!*Y++p@@rqlkejUz6gu$3yEk~Dr|^gb zB&#|~x{sAf<*%qMKs_=XCY5mw=ac&RsYb+2fFh;%x-34IQ?ne|kzW;{b^V?Z-WCbK zJN4MLQ>)A7&F$$kF~Q<%$nk6bd#j~iB-d({@ZG?qgYv}OnY*j2I|_MnC`s5h18dqCJzBk90D}W{GVY05q$Dk2Y8+Y`Y|gHo&16 HerfmLd{-pw literal 0 HcmV?d00001 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java index 04e431361ba..5bf9824140a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java @@ -672,6 +672,31 @@ public void testPOIFSContainerDetector() throws Exception { } } + @Test + public void testPKCSAndFriends() throws Exception { + assertTypeByNameAndData("Test4.pdf.tsd", "application/timestamped-data"); + + + assertTypeByNameAndData("testPKCS7_certs_only_def.p7c", "application/pkcs7-mime; smime-type=certs-only"); + assertTypeByNameAndData("testPKCS7_certs_only_ind.p7c", "application/pkcs7-mime; smime-type=certs-only"); + assertTypeByNameAndData("testPKCS7_compressed_def_long.p7z", "application/pkcs7-mime; smime-type=compressed-data"); + assertTypeByNameAndData("testPKCS7_compressed_def_short.p7z", "application/pkcs7-mime; smime-type=compressed-data"); + assertTypeByNameAndData("testPKCS7_compressed_ind.p7z", "application/pkcs7-mime; smime-type=compressed-data"); + assertTypeByNameAndData("testPKCS7_signature_def.p7s", "application/pkcs7-signature"); + assertTypeByNameAndData("testPKCS7_signature_ind.p7s", "application/pkcs7-signature"); + + assertTypeByNameAndData("testPKCS7_signed_data_def.p7m", "application/pkcs7-mime; smime-type=signed-data"); + assertTypeByNameAndData("testPKCS7_signed_data_ind.p7m", "application/pkcs7-mime; smime-type=signed-data"); + assertTypeByNameAndData("testPKCS7_enveloped_def.p7m", "application/pkcs7-mime; smime-type=enveloped-data"); + assertTypeByNameAndData("testPKCS7_enveloped_ind.p7m", "application/pkcs7-mime; smime-type=enveloped-data"); + + assertTypeByNameAndData("test.xml.p7m", "application/pkcs7-mime; smime-type=signed-data"); + assertTypeByNameAndData("Test4.pdf.tsd", "application/timestamped-data"); + assertTypeByNameAndData("testDetached.p7s", "application/pkcs7-signature"); + + assertTypeByNameAndData("testRSAKEYandCERT.p12", "application/x-pkcs12"); + } + private long countBytes(InputStream is) throws IOException { int b = is.read(); long len = 0; diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java index 82a9e7df929..b7ee78b9bca 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java @@ -32,13 +32,15 @@ public void testBasic() throws Exception { //integration test Detector detector = TikaConfig.getDefaultConfig().getDetector(); List detectors = ((CompositeDetector) detector).getDetectors(); - assertEquals(7, detectors.size()); + assertEquals(8, detectors.size()); assertEquals("org.gagravarr.tika.OggDetector", detectors.get(0).getClass().getName()); - assertEquals("org.apache.tika.detect.gzip.GZipSpecializationDetector", + assertEquals("org.apache.tika.detect.crypto.ASN1Detector", detectors.get(2).getClass().getName()); + assertEquals("org.apache.tika.detect.gzip.GZipSpecializationDetector", + detectors.get(3).getClass().getName()); assertEquals("org.apache.tika.detect.microsoft.POIFSContainerDetector", - detectors.get(3).getClass().getName()); - assertEquals("org.apache.tika.mime.MimeTypes", detectors.get(6).getClass().getName()); + detectors.get(4).getClass().getName()); + assertEquals("org.apache.tika.mime.MimeTypes", detectors.get(7).getClass().getName()); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java index f72f7abb7fa..ca807d10aac 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java @@ -1214,14 +1214,14 @@ public void testMIF() throws Exception { @Test public void testPKCSSignatures() throws Exception { // PKCS7 Signed XML files - assertType("application/pkcs7-signature", "testPKCS17Sig.xml.p7m"); - assertType("application/pkcs7-signature", "testPKCS17Sig-v2.xml.p7m"); - assertType("application/pkcs7-signature", "testPKCS17Sig-v3.xml.p7m"); - assertType("application/pkcs7-signature", "testPKCS17Sig-v4.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig-v2.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig-v3.xml.p7m"); - assertTypeByData("application/pkcs7-signature", "testPKCS17Sig-v4.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig-v2.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig-v3.xml.p7m"); + assertType("application/pkcs7-mime", "testPKCS17Sig-v4.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig-v2.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig-v3.xml.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS17Sig-v4.xml.p7m"); } @Test @@ -1386,6 +1386,37 @@ public void testAACDetection() throws Exception { assertTypeByName("audio/x-aac", "x.aac"); } + @Test + public void testPKCS7() throws Exception { + //from rob975 on https://github.com/apache/tika/pull/267 + // application/pkcs7-* media types + // this tests only the magic detector, which is not sufficient. See the ASN1Detector + assertType("application/pkcs7-mime; smime-type=certs-only", "testPKCS7_certs_only_def.p7c"); + assertType("application/pkcs7-mime; smime-type=certs-only", "testPKCS7_certs_only_ind.p7c"); + assertType("application/pkcs7-mime; smime-type=compressed-data", "testPKCS7_compressed_def_long.p7z"); + assertType("application/pkcs7-mime; smime-type=compressed-data", "testPKCS7_compressed_def_short.p7z"); + assertType("application/pkcs7-mime; smime-type=compressed-data", "testPKCS7_compressed_ind.p7z"); + assertType("application/pkcs7-signature", "testPKCS7_signature_def.p7s"); + assertType("application/pkcs7-signature", "testPKCS7_signature_ind.p7s"); + + /* can't distinguish these with file extension and magic + assertType("application/pkcs7-mime; smime-type=signed-data", "testPKCS7_signed_data_def.p7m"); + assertType("application/pkcs7-mime; smime-type=signed-data", "testPKCS7_signed_data_def.pem"); + assertType("application/pkcs7-mime; smime-type=signed-data", "testPKCS7_signed_data_ind.p7m"); + assertType("application/pkcs7-mime; smime-type=enveloped-data", "testPKCS7_enveloped_def.p7m"); + assertType("application/pkcs7-mime; smime-type=enveloped-data", "testPKCS7_enveloped_ind.p7m"); + */ + assertTypeByData("application/pkcs7-mime", "testPKCS7_certs_only_def.p7c"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_certs_only_ind.p7c"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_enveloped_def.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_enveloped_ind.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signature_def.p7s"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signature_ind.p7s"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signed_data_def.p7m"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signed_data_def.pem"); + assertTypeByData("application/pkcs7-mime", "testPKCS7_signed_data_ind.p7m"); + } + private void assertText(byte[] prefix) throws IOException { assertMagic("text/plain", prefix); } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java index 18c13145991..d46105ec4f4 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java @@ -40,6 +40,7 @@ public void testBrokenPdf() throws Exception { //make sure that embedded file appears in list //and make sure embedded exception is recorded List list = getRecursiveMetadata("testTSD_broken_pdf.tsd", parseContext); + debug(list); assertEquals(2, list.size()); assertEquals("application/pdf", list.get(1).get(Metadata.CONTENT_TYPE)); assertNotNull(list.get(1).get(TikaCoreProperties.EMBEDDED_EXCEPTION)); From ae40dde2343014760006e3fd077ebe549f636336 Mon Sep 17 00:00:00 2001 From: tallison Date: Tue, 21 Oct 2025 08:41:51 -0400 Subject: [PATCH 2/3] fix unit test --- .../tika/detect/TestDetectorLoading.java | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java index 93c0a132570..ce367ce250d 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java @@ -18,6 +18,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.ArrayList; import java.util.List; import org.junit.jupiter.api.Test; @@ -32,14 +33,20 @@ public void testBasic() throws Exception { //integration test Detector detector = TikaConfig.getDefaultConfig().getDetector(); List detectors = ((CompositeDetector) detector).getDetectors(); + String[] expected = new String[]{ + "org.gagravarr.tika.OggDetector", + "org.apache.tika.detect.MatroskaDetector", + "org.apache.tika.detect.apple.BPListDetector", + "org.apache.tika.detect.crypto.ASN1Detector", + "org.apache.tika.detect.gzip.GZipSpecializationDetector", + "org.apache.tika.detect.microsoft.POIFSContainerDetector", + "org.apache.tika.detect.ole.MiscOLEDetector", + "org.apache.tika.detect.zip.DefaultZipContainerDetector", + "org.apache.tika.mime.MimeTypes" + }; assertEquals(9, detectors.size()); - assertEquals("org.gagravarr.tika.OggDetector", detectors.get(0).getClass().getName()); - assertEquals("org.apache.tika.detect.crypto.ASN1Detector", - detectors.get(2).getClass().getName()); - assertEquals("org.apache.tika.detect.gzip.GZipSpecializationDetector", - detectors.get(3).getClass().getName()); - assertEquals("org.apache.tika.detect.microsoft.POIFSContainerDetector", - detectors.get(4).getClass().getName()); - assertEquals("org.apache.tika.mime.MimeTypes", detectors.get(7).getClass().getName()); + for (int i = 0; i < detectors.size(); i++) { + assertEquals(expected[i], detectors.get(i).getClass().getName()); + } } } From 7a80b85e17b64d44c01f637484a4e308d8547bc1 Mon Sep 17 00:00:00 2001 From: tallison Date: Tue, 21 Oct 2025 09:08:30 -0400 Subject: [PATCH 3/3] checkstyle --- .../test/java/org/apache/tika/detect/TestDetectorLoading.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java index ce367ce250d..8b8875a7b63 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java @@ -18,7 +18,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; -import java.util.ArrayList; import java.util.List; import org.junit.jupiter.api.Test;