diff --git a/runner/runner.go b/runner/runner.go index 6182d396..2a202652 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -150,8 +150,11 @@ func New(options *Options) (*Runner, error) { } if options.StoreResponseDir != "" { - _ = os.RemoveAll(filepath.Join(options.StoreResponseDir, "response", "index.txt")) - _ = os.RemoveAll(filepath.Join(options.StoreResponseDir, "screenshot", "index_screenshot.txt")) + // Don't remove index files if skip-dedupe is enabled (we want to append, not truncate) + if !options.SkipDedupe { + _ = os.RemoveAll(filepath.Join(options.StoreResponseDir, "response", "index.txt")) + _ = os.RemoveAll(filepath.Join(options.StoreResponseDir, "screenshot", "index_screenshot.txt")) + } } httpxOptions := httpx.DefaultOptions @@ -499,15 +502,23 @@ func (r *Runner) prepareInputPaths() { } } +var duplicateTargetErr = errors.New("duplicate target") + func (r *Runner) prepareInput() { var numHosts int // check if input target host(s) have been provided if len(r.options.InputTargetHost) > 0 { for _, target := range r.options.InputTargetHost { - expandedTarget, _ := r.countTargetFromRawTarget(target) - if expandedTarget > 0 { + expandedTarget, err := r.countTargetFromRawTarget(target) + if err == nil && expandedTarget > 0 { numHosts += expandedTarget - r.hm.Set(target, nil) //nolint + r.hm.Set(target, []byte("1")) //nolint + } else if r.options.SkipDedupe && errors.Is(err, duplicateTargetErr) { + if v, ok := r.hm.Get(target); ok { + cnt, _ := strconv.Atoi(string(v)) + _ = r.hm.Set(target, []byte(strconv.Itoa(cnt+1))) + numHosts += 1 + } } } } @@ -665,10 +676,16 @@ func (r *Runner) loadAndCloseFile(finput *os.File) (numTargets int, err error) { for scanner.Scan() { target := strings.TrimSpace(scanner.Text()) // Used just to get the exact number of targets - expandedTarget, _ := r.countTargetFromRawTarget(target) - if expandedTarget > 0 { + expandedTarget, err := r.countTargetFromRawTarget(target) + if err == nil && expandedTarget > 0 { numTargets += expandedTarget - r.hm.Set(target, nil) //nolint + r.hm.Set(target, []byte("1")) //nolint + } else if r.options.SkipDedupe && errors.Is(err, duplicateTargetErr) { + if v, ok := r.hm.Get(target); ok { + cnt, _ := strconv.Atoi(string(v)) + _ = r.hm.Set(target, []byte(strconv.Itoa(cnt+1))) + numTargets += 1 + } } } err = finput.Close() @@ -679,8 +696,9 @@ func (r *Runner) countTargetFromRawTarget(rawTarget string) (numTargets int, err if rawTarget == "" { return 0, nil } + if _, ok := r.hm.Get(rawTarget); ok { - return 0, nil + return 0, duplicateTargetErr } expandedTarget := 0 @@ -911,7 +929,8 @@ func (r *Runner) RunEnumeration() { gologger.Fatal().Msgf("Could not create response directory '%s': %s\n", responseDirPath, err) } indexPath := filepath.Join(responseDirPath, "index.txt") - if r.options.Resume { + // Append if resume is enabled or skip-dedupe is enabled (never truncate with -sd) + if r.options.Resume || r.options.SkipDedupe { indexFile, err = os.OpenFile(indexPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) } else { indexFile, err = os.Create(indexPath) @@ -925,7 +944,8 @@ func (r *Runner) RunEnumeration() { if r.options.Screenshot { var err error indexScreenshotPath := filepath.Join(r.options.StoreResponseDir, "screenshot", "index_screenshot.txt") - if r.options.Resume { + // Append if resume is enabled or skip-dedupe is enabled (never truncate with -sd) + if r.options.Resume || r.options.SkipDedupe { indexScreenshotFile, err = os.OpenFile(indexScreenshotPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) } else { indexScreenshotFile, err = os.Create(indexScreenshotPath) @@ -1117,10 +1137,8 @@ func (r *Runner) RunEnumeration() { // store responses or chain in directory if resp.Err == nil { URL, _ := urlutil.Parse(resp.URL) - domainFile := resp.Method + ":" + URL.EscapedString() - hash := hashes.Sha1([]byte(domainFile)) - domainResponseFile := fmt.Sprintf("%s.txt", hash) - screenshotResponseFile := fmt.Sprintf("%s.png", hash) + domainResponseFile := fmt.Sprintf("%s.txt", resp.FileNameHash) + screenshotResponseFile := fmt.Sprintf("%s.png", resp.FileNameHash) hostFilename := strings.ReplaceAll(URL.Host, ":", "_") domainResponseBaseDir := filepath.Join(r.options.StoreResponseDir, "response") domainScreenshotBaseDir := filepath.Join(r.options.StoreResponseDir, "screenshot") @@ -1320,14 +1338,28 @@ func (r *Runner) RunEnumeration() { } } - if len(r.options.requestURIs) > 0 { - for _, p := range r.options.requestURIs { - scanopts := r.scanopts.Clone() - scanopts.RequestURI = p - r.process(k, wg, r.hp, protocol, scanopts, output) + runProcess := func(times int) { + for i := 0; i < times; i++ { + if len(r.options.requestURIs) > 0 { + for _, p := range r.options.requestURIs { + scanopts := r.scanopts.Clone() + scanopts.RequestURI = p + r.process(k, wg, r.hp, protocol, scanopts, output) + } + } else { + r.process(k, wg, r.hp, protocol, &r.scanopts, output) + } } - } else { - r.process(k, wg, r.hp, protocol, &r.scanopts, output) + } + + if r.options.Stream { + runProcess(1) + } else if v, ok := r.hm.Get(k); ok { + cnt, err := strconv.Atoi(string(v)) + if err != nil || cnt <= 0 { + cnt = 1 + } + runProcess(cnt) } return nil @@ -2219,7 +2251,7 @@ retry: domainResponseBaseDir := filepath.Join(scanopts.StoreResponseDirectory, "response") responseBaseDir := filepath.Join(domainResponseBaseDir, hostFilename) - var responsePath string + var responsePath, fileNameHash string // store response if scanopts.StoreResponse || scanopts.StoreChain { if r.options.OmitBody { @@ -2240,9 +2272,33 @@ retry: data = append(data, []byte("\n\n\n")...) data = append(data, []byte(fullURL)...) _ = fileutil.CreateFolder(responseBaseDir) - writeErr := os.WriteFile(responsePath, data, 0644) - if writeErr != nil { - gologger.Error().Msgf("Could not write response at path '%s', to disk: %s", responsePath, writeErr) + + basePath := strings.TrimSuffix(responsePath, ".txt") + var idx int + for idx = 0; ; idx++ { + targetPath := responsePath + if idx > 0 { + targetPath = fmt.Sprintf("%s_%d.txt", basePath, idx) + } + f, err := os.OpenFile(targetPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644) + if err == nil { + _, writeErr := f.Write(data) + _ = f.Close() + if writeErr != nil { + gologger.Error().Msgf("Could not write to '%s': %s", targetPath, writeErr) + } + break + } + if !os.IsExist(err) { + gologger.Error().Msgf("Failed to create file '%s': %s", targetPath, err) + break + } + } + + if idx == 0 { + fileNameHash = hash + } else { + fileNameHash = fmt.Sprintf("%s_%d", hash, idx) } } @@ -2433,6 +2489,7 @@ retry: RequestRaw: requestDump, Response: resp, FaviconData: faviconData, + FileNameHash: fileNameHash, CPE: cpeMatches, WordPress: wpInfo, } diff --git a/runner/runner_test.go b/runner/runner_test.go index a6f94826..10b8320b 100644 --- a/runner/runner_test.go +++ b/runner/runner_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/pkg/errors" _ "github.com/projectdiscovery/fdmax/autofdmax" "github.com/projectdiscovery/httpx/common/httpx" "github.com/projectdiscovery/mapcidr/asn" @@ -154,7 +155,9 @@ func TestRunner_asn_targets(t *testing.T) { } func TestRunner_countTargetFromRawTarget(t *testing.T) { - options := &Options{} + options := &Options{ + SkipDedupe: false, + } r, err := New(options) require.Nil(t, err, "could not create httpx runner") @@ -169,7 +172,7 @@ func TestRunner_countTargetFromRawTarget(t *testing.T) { err = r.hm.Set(input, nil) require.Nil(t, err, "could not set value to hm") got, err = r.countTargetFromRawTarget(input) - require.Nil(t, err, "could not count targets") + require.True(t, errors.Is(err, duplicateTargetErr), "expected duplicate target error") require.Equal(t, expected, got, "got wrong output") input = "173.0.84.0/24" diff --git a/runner/types.go b/runner/types.go index 9c169843..4cde28ba 100644 --- a/runner/types.go +++ b/runner/types.go @@ -102,6 +102,7 @@ type Result struct { Response *httpx.Response `json:"-" csv:"-" mapstructure:"-"` FaviconData []byte `json:"-" csv:"-" mapstructure:"-"` Trace *retryablehttp.TraceInfo `json:"trace,omitempty" csv:"-" mapstructure:"trace"` + FileNameHash string `json:"-" csv:"-" mapstructure:"-"` CPE []CPEInfo `json:"cpe,omitempty" csv:"cpe" mapstructure:"cpe"` WordPress *WordPressInfo `json:"wordpress,omitempty" csv:"wordpress" mapstructure:"wordpress"` }