@@ -23,6 +23,36 @@ namespace :backfill do
2323 end
2424 end
2525
26+ task :most_used_attribute_word_counts : :environment do
27+ word_counts = { }
28+ Attribute . where ( word_count_cache : nil ) . group ( :value ) . order ( 'count_id DESC' ) . limit ( 500 ) . count ( :id ) . each do |value , count |
29+ word_count = WordCountAnalyzer ::Counter . new (
30+ ellipsis : 'no_special_treatment' ,
31+ hyperlink : 'count_as_one' ,
32+ contraction : 'count_as_one' ,
33+ hyphenated_word : 'count_as_one' ,
34+ date : 'no_special_treatment' ,
35+ number : 'count' ,
36+ numbered_list : 'ignore' ,
37+ xhtml : 'remove' ,
38+ forward_slash : 'count_as_multiple_except_dates' ,
39+ backslash : 'count_as_one' ,
40+ dotted_line : 'ignore' ,
41+ dashed_line : 'ignore' ,
42+ underscore : 'ignore' ,
43+ stray_punctuation : 'ignore'
44+ ) . count ( value )
45+
46+ word_counts [ word_count ] ||= [ ]
47+ word_counts [ word_count ] . push value
48+ puts "#{ value } x #{ count } : #{ word_count } words"
49+ end
50+
51+ word_counts . each do |count , values |
52+ Attribute . where ( word_count_cache : nil , value : values ) . update_all ( word_count_cache : count )
53+ end
54+ end
55+
2656 desc "Backfill cached word counts on all documents"
2757 task document_word_count_caches : :environment do
2858 Document . where ( cached_word_count : nil ) . where . not ( body : [ nil , "" ] ) . find_each ( batch_size : 500 ) do |document |
0 commit comments