|
| 1 | +# R scripts to generate figures for the paper "Representations of Machine Vision |
| 2 | +# Technologies in Artworks, Games and Narratives: A Dataset", submitted to the |
| 3 | +# journal Data in Brief in March 2022. |
| 4 | +# |
| 5 | +# The code was written by Jill Walker Rettberg. |
| 6 | +# |
| 7 | +# |
| 8 | + |
| 9 | +# setup------------------------------------------------ |
| 10 | + |
| 11 | +library(tidyverse) |
| 12 | +library(ggthemes) |
| 13 | + |
| 14 | +# Import creativeworks ---------------------------------------------------- |
| 15 | + |
| 16 | +CreativeWorks <- read_csv("data/creativeworks.csv", |
| 17 | + col_types = cols( |
| 18 | + WorkID = col_integer(), |
| 19 | + WorkTitle = col_character(), |
| 20 | + Sentiment = col_factor(levels = c( |
| 21 | + "Exciting", "Flawed", "Helpful", "Neutral", "Wondrous", |
| 22 | + "Hostile","Oppressive", "Alien", "Creepy", "Subversive", |
| 23 | + "Dangerous", "Intrusive", "Empowering", "Protective", |
| 24 | + "Intimate", "Misleading", "Fun", "Overwhelming", |
| 25 | + "Prosocial", "Disgusting")), |
| 26 | + Topic = col_factor(levels = c( |
| 27 | + "Nudity", "Social Media", "Romantic relationship", "Climate Change", |
| 28 | + "Dystopian", "Horror", "Robots/androids", "Surveillance", "Automation", |
| 29 | + "City", "Labour", "War", "Identity", "AI", "Animals", "Consciousness", |
| 30 | + "Nature", "Companionship", "Competition", "Playful", "Family", |
| 31 | + "Free will", "Physical violence", "Crime", "Hacking", "Conflict", |
| 32 | + "Empathy", "Utopian", "Race", "Sex", "Cyborgs", "Inequality", |
| 33 | + "Economy", "Grief", "Autonomous vehicles", "Gender")), |
| 34 | + TechRef= col_factor(levels = c( |
| 35 | + "Holograms", "Augmented reality", "Ocular implant", |
| 36 | + "Emotion recognition", "Surveillance cameras", "AI", |
| 37 | + "Virtual reality", "Motion tracking", "Body scans", |
| 38 | + "Drones", "MicroscopeOrTelescope", "Biometrics", |
| 39 | + "Image generation", "Facial recognition", |
| 40 | + "Object recognition", "3D scans", "Machine learning", |
| 41 | + "Filtering", "Deepfake", "Camera", "Cameraphone", |
| 42 | + "Interactive panoramas", "Non-Visible Spectrum", "UGV", |
| 43 | + "Webcams", "Satellite images")), |
| 44 | + TechUsed= col_factor(levels = c( |
| 45 | + "Holograms", "Augmented reality", "Ocular implant", |
| 46 | + "Emotion recognition", "Surveillance cameras", "AI", |
| 47 | + "Virtual reality", "Motion tracking", "Body scans", |
| 48 | + "Drones", "MicroscopeOrTelescope", "Biometrics", |
| 49 | + "Image generation", "Facial recognition", |
| 50 | + "Object recognition", "3D scans", "Machine learning", |
| 51 | + "Filtering", "Deepfake", "Camera", "Cameraphone", |
| 52 | + "Interactive panoramas", "Non-Visible Spectrum", "UGV", |
| 53 | + "Webcams", "Satellite images")))) |
| 54 | + |
| 55 | + |
| 56 | +# FIGURE 1: Line graph showing genres by year ------------------------------------- |
| 57 | + |
| 58 | +CreativeWorks %>% |
| 59 | + select(WorkID, Genre, Year) %>% |
| 60 | + distinct() %>% |
| 61 | + filter(Year >2000) %>% |
| 62 | + ggplot(aes(x=Year, fill=Genre, colour = Genre)) + |
| 63 | + geom_line(stat="count") + |
| 64 | + scale_colour_tableau() + |
| 65 | + labs(fill="", |
| 66 | + #title ="Year of publication or first release for creative works in the machine vision dataset", |
| 67 | + y = "", |
| 68 | + x = "") + |
| 69 | + theme_minimal() |
| 70 | + |
| 71 | +# FIGURE 2: Plot geographic distribution -------------------------------------------- |
| 72 | + |
| 73 | +# See https://cran.r-project.org/web/packages/countrycode/countrycode.pdf |
| 74 | +# for description of this package's groupings of countries. |
| 75 | +# setting destination = "region" means |
| 76 | +# sorting countries into 7 regions as defined by the World Bank |
| 77 | +# development Indicators. |
| 78 | +# |
| 79 | +library(countrycode) |
| 80 | + |
| 81 | +CreativeWorks$Continent <- countrycode( |
| 82 | + sourcevar = CreativeWorks$Country, |
| 83 | + origin = "country.name", |
| 84 | + destination = "region") |
| 85 | + |
| 86 | +CreativeWorks$Continent = factor(CreativeWorks$Continent, |
| 87 | + levels = c("North America", "Europe & Central Asia", |
| 88 | + "East Asia & Pacific", "Latin America & Caribbean", |
| 89 | + "Sub-Saharan Africa","Middle East & North Africa", |
| 90 | + "South Asia")) |
| 91 | + |
| 92 | +# Grouping regions with few cases, then plotting. |
| 93 | +# |
| 94 | +CreativeWorks %>% |
| 95 | +select(WorkID, Genre, Continent) %>% |
| 96 | + distinct() %>% |
| 97 | + select(Genre, Continent) %>% |
| 98 | + mutate(Continent = recode(Continent, |
| 99 | + "Middle East & North Africa" = "Middle East & Africa", |
| 100 | + "Sub-Saharan Africa" = "Middle East & Africa", |
| 101 | + "East Asia & Pacific" = "East & South Asia & Pacific", |
| 102 | + "South Asia" = "East & South Asia & Pacific")) %>% |
| 103 | + ggplot(aes(x=fct_infreq(Continent), fill = Genre)) + |
| 104 | + geom_bar() + |
| 105 | + geom_text(stat = "count", |
| 106 | + aes(label =..count..), |
| 107 | + position = position_stack(vjust = 0.5), |
| 108 | + size=3, |
| 109 | + colour="white") + |
| 110 | + scale_fill_tableau() + |
| 111 | + labs( |
| 112 | + #title ="Geographic distribution of creative works in the machine vision dataset", |
| 113 | + #subtitle = "Note that some works are affiliated with several countries.", |
| 114 | + y = "Number of works", |
| 115 | + x = "") + |
| 116 | + theme_minimal() + |
| 117 | + theme(axis.text.x = element_text(angle=45, vjust=1, hjust = 1)) + |
| 118 | + facet_wrap(~Genre, scales = "free_x") |
| 119 | + |
| 120 | + |
| 121 | +# Stacked barchart of geographic distribution by continent ---------------- |
| 122 | + |
| 123 | +CreativeWorks %>% |
| 124 | + select(WorkID, Genre, Continent) %>% |
| 125 | + distinct() %>% |
| 126 | + select(Genre, Continent) %>% |
| 127 | + mutate(Continent = recode(Continent, |
| 128 | + "Middle East & North Africa" = "Middle East & Africa", |
| 129 | + "Sub-Saharan Africa" = "Middle East & Africa", |
| 130 | + "East Asia & Pacific" = "East & South Asia & Pacific", |
| 131 | + "South Asia" = "East & South Asia & Pacific")) %>% |
| 132 | + pivot_longer(-Continent) %>% |
| 133 | + ggplot(aes(x=Continent, fill=value)) + |
| 134 | + geom_bar() + |
| 135 | + geom_text(stat = "count", |
| 136 | + aes(label =..count..), |
| 137 | + position = position_stack(vjust = 0.5), |
| 138 | + size=4, |
| 139 | + colour="white") + |
| 140 | + scale_fill_tableau() + |
| 141 | + labs(fill="", |
| 142 | + title ="Geographic distribution of creative works in the machine vision dataset", |
| 143 | + subtitle = "Note that some works are affiliated with several countries.", |
| 144 | + y = "", |
| 145 | + x = "") + |
| 146 | + theme_minimal() + |
| 147 | + theme(axis.text.x = element_text(angle=45, vjust=1, hjust = 1)) |
| 148 | + |
| 149 | + |
| 150 | +# Distribution by country in Europe --------------------------------------- |
| 151 | + |
| 152 | +# Set levels for Country so we can sort them by frequency |
| 153 | +# using fct_infreq() in ggplot(). |
| 154 | +levels(CreativeWorks$Country) = factor(unique(CreativeWorks$Country)) |
| 155 | + |
| 156 | +# Plot barchart for Europe and Central Asia |
| 157 | +CreativeWorks %>% |
| 158 | + select(WorkID, Genre, Continent, Country) %>% |
| 159 | + distinct() %>% |
| 160 | + filter(Continent == "Europe & Central Asia") %>% |
| 161 | + select(Genre, Country) %>% |
| 162 | + pivot_longer(-Country) %>% |
| 163 | + ggplot(aes(x=fct_infreq(Country), fill=value)) + |
| 164 | + geom_bar() + |
| 165 | + geom_text(stat = "count", |
| 166 | + aes(label =..count..), |
| 167 | + position = position_stack(vjust = 0.5), |
| 168 | + size=3, |
| 169 | + colour="white") + |
| 170 | + scale_fill_tableau() + |
| 171 | + labs(fill="", |
| 172 | + title ="European and Central Asian works in the Machine Vision dataset", |
| 173 | + subtitle = "Note that some works are affiliated with several countries.", |
| 174 | + y = "", |
| 175 | + x = "") + |
| 176 | + theme_minimal() + |
| 177 | + theme(axis.text.x = element_text(angle=45, vjust=1, hjust = 1)) |
| 178 | + |
| 179 | + |
| 180 | +# UK vs everything else --------------------------------------------------- |
| 181 | + |
| 182 | +CreativeWorks$Region <- countrycode( |
| 183 | + sourcevar = CreativeWorks$Country, |
| 184 | + origin = "country.name", |
| 185 | + destination = "un.regionsub.name") |
| 186 | + |
| 187 | +CreativeWorks %>% |
| 188 | + select(WorkID, Genre, Continent, Region, Country) %>% |
| 189 | + distinct() %>% |
| 190 | + filter(Continent == "Europe & Central Asia" & Country != "Kazakhstan") %>% |
| 191 | + mutate(Region1 = recode(Country, |
| 192 | + "United Kingdom" = "UK & Ireland", |
| 193 | + "Ireland" = "UK & Ireland", |
| 194 | + "Russia" = "Russia", |
| 195 | + .default = Region)) %>% |
| 196 | + select(Genre, Region1) %>% |
| 197 | + pivot_longer(-Region1) %>% |
| 198 | + ggplot(aes(x=fct_infreq(Region1), fill=value)) + |
| 199 | + geom_bar() + |
| 200 | + geom_text(stat = "count", |
| 201 | + aes(label =..count..), |
| 202 | + position = position_stack(vjust = 0.5), |
| 203 | + size=3, |
| 204 | + colour="white") + |
| 205 | + scale_fill_tableau() + |
| 206 | + labs(fill="", |
| 207 | + title ="European and Central Asian works in the Machine Vision dataset", |
| 208 | + subtitle = "Note that some works are affiliated with several countries.", |
| 209 | + y = "", |
| 210 | + x = "") + |
| 211 | + theme_minimal() + |
| 212 | + theme(axis.text.x = element_text(angle=45, vjust=1, hjust = 1)) |
| 213 | + |
| 214 | + |
| 215 | +view( |
| 216 | + CreativeWorks %>% |
| 217 | + select(WorkID, Genre, Continent, Region, Country) %>% |
| 218 | + distinct() %>% |
| 219 | + filter(Continent == "Europe & Central Asia")) |
| 220 | + |
| 221 | +# Distribution by year ---------------------------------------------------- |
| 222 | + |
| 223 | +CreativeWorks %>% |
| 224 | + select(WorkID, Genre, Year) %>% |
| 225 | + distinct() %>% |
| 226 | + filter(Year >2000) %>% |
| 227 | + ggplot(aes(x=Year, fill=Genre)) + |
| 228 | + geom_bar(aes(x = Year, fill = Genre)) + |
| 229 | + geom_text(stat = "count", |
| 230 | + aes(label =..count..), |
| 231 | + position = position_stack(vjust = 0.5), |
| 232 | + size=3, |
| 233 | + colour="white") + |
| 234 | + scale_fill_tableau() + |
| 235 | + labs(fill="", |
| 236 | + title ="Year of publication or first release for creative works in the machine vision dataset", |
| 237 | + y = "", |
| 238 | + x = "") |
| 239 | + |
| 240 | + |
| 241 | + |
0 commit comments