Skip to content
This repository was archived by the owner on Oct 11, 2023. It is now read-only.

Commit e1d56ca

Browse files
committed
Naive approach to zip/gzip sampledata
1 parent 9809eae commit e1d56ca

File tree

4 files changed

+62
-19
lines changed

4 files changed

+62
-19
lines changed

sampledata/src/main/scala/SampleData.scala

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package io.continuum.bokeh
22
package sampledata
33

44
import java.io.{File,InputStream,FileInputStream,InputStreamReader,FileNotFoundException}
5-
import java.util.zip.GZIPInputStream
5+
import java.util.zip.{ZipInputStream,GZIPInputStream}
66
import java.net.URL
77

88
import scala.collection.JavaConverters._
@@ -16,6 +16,21 @@ import net.fortuna.ical4j.model.{Calendar,Component}
1616
import net.fortuna.ical4j.model.component.VEvent
1717
import net.fortuna.ical4j.data.CalendarBuilder
1818

19+
object FileName {
20+
implicit def stringToFileName(fileName: String): FileName = Simple(fileName)
21+
}
22+
sealed trait FileName {
23+
val name: String
24+
def realName: String = name
25+
}
26+
case class Simple(name: String) extends FileName
27+
case class GZip(name: String) extends FileName {
28+
override def realName = name + ".gz"
29+
}
30+
case class Zip(name: String) extends FileName {
31+
override def realName = name.substring(0, name.lastIndexOf(".")) + ".zip"
32+
}
33+
1934
object SampleData {
2035
lazy val dataPath: Path = {
2136
val home = Path.fromString(System.getProperty("user.home"))
@@ -24,37 +39,65 @@ object SampleData {
2439
path
2540
}
2641

27-
def getStreamFromResources(fileName: String): Option[InputStream] = {
28-
Option(getClass.getClassLoader.getResourceAsStream(fileName))
42+
def getStreamFromResources(fileName: FileName): Option[InputStream] = {
43+
Option(getClass.getClassLoader.getResourceAsStream(fileName.realName))
2944
}
3045

31-
def getStreamFromFile(fileName: String): Option[InputStream] = {
32-
val filePath = dataPath / fileName
46+
def getStreamFromFile(fileName: FileName): Option[InputStream] = {
47+
val filePath = dataPath / fileName.realName
3348
val fileOption = if (filePath.exists) filePath.fileOption else download(fileName)
3449
fileOption.map(new FileInputStream(_))
3550
}
3651

37-
def getFileStream(fileName: String): Option[InputStream] = {
52+
def getFileStream(fileName: FileName): Option[InputStream] = {
3853
getStreamFromResources(fileName) orElse getStreamFromFile(fileName)
3954
}
4055

41-
def getGZipStream(fileName: String): Option[InputStream] = {
42-
getFileStream(fileName + ".gz").map(new GZIPInputStream(_))
56+
def getGZipStream(fileName: FileName): Option[InputStream] = {
57+
getFileStream(fileName).map(new GZIPInputStream(_))
4358
}
4459

45-
def getStream(fileName: String): InputStream = {
46-
getFileStream(fileName) orElse getGZipStream(fileName) getOrElse {
47-
throw new FileNotFoundException(s"can't locate $fileName(.gz) in resources, .bokeh/data or S3")
60+
def getZipStream(fileName: FileName): Option[InputStream] = {
61+
getFileStream(fileName).flatMap { stream =>
62+
val zip = new ZipInputStream(stream)
63+
var entry = zip.getNextEntry()
64+
var found = false
65+
while (entry != null) {
66+
found = !entry.isDirectory && entry.getName == fileName.name
67+
if (found) {
68+
entry = null
69+
} else {
70+
zip.closeEntry()
71+
entry = zip.getNextEntry()
72+
}
73+
}
74+
if (found) {
75+
Some(zip)
76+
} else {
77+
zip.close()
78+
None
79+
}
80+
}
81+
}
82+
83+
def getStream(fileName: FileName): InputStream = {
84+
val streamOpt = fileName match {
85+
case Simple(_) => getFileStream(fileName)
86+
case GZip(_) => getGZipStream(fileName)
87+
case Zip(_) => getZipStream(fileName)
88+
}
89+
streamOpt getOrElse {
90+
throw new FileNotFoundException(s"can't locate ${fileName.name} in resources, .bokeh/data or S3")
4891
}
4992
}
5093

5194
val dataUrl = new URL("https://s3.amazonaws.com/bokeh_data/")
5295

53-
def download(fileName: String): Option[File] = {
54-
val url = new URL(dataUrl, fileName)
96+
def download(fileName: FileName): Option[File] = {
97+
val url = new URL(dataUrl, fileName.realName)
5598

5699
val input = url.asInput
57-
val output = dataPath / fileName
100+
val output = dataPath / fileName.realName
58101

59102
input.size match {
60103
case Some(size) =>
@@ -70,15 +113,15 @@ object SampleData {
70113
trait SampleData
71114

72115
trait CSVSampleData extends SampleData {
73-
protected def loadRows(fileName: String): List[List[String]] = {
116+
protected def loadRows(fileName: FileName): List[List[String]] = {
74117
val input = new InputStreamReader(SampleData.getStream(fileName))
75118
val reader = new CSVReader(input, ',', '"', '\\', 1)
76119
reader.readAll().asScala.map(_.map(_.trim).toList).toList
77120
}
78121
}
79122

80123
trait ICalSampleData {
81-
protected def loadEvents(fileName: String): List[VEvent] = {
124+
protected def loadEvents(fileName: FileName): List[VEvent] = {
82125
val input = SampleData.getStream(fileName)
83126
val builder = new CalendarBuilder()
84127
val calendar = builder.build(input)

sampledata/src/main/scala/USCounties.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ object USCounties extends CSVSampleData {
55
type Value = Map[(Int, Int), USCountyData]
66

77
def load(): Value = {
8-
loadRows("US_Counties.csv").collect {
8+
loadRows(Zip("US_Counties.csv")).collect {
99
case List(name, _, _, USState(state), geom, _, _, _, _, stateId, countyId, _, _) =>
1010
val coords = xml.XML.loadString(geom) \\ "outerBoundaryIs" \ "LinearRing" \ "coordinates" head
1111
val Array(lons, lats) = coords.text.split(" ").map(_.split(",").map(_.toDouble)).transpose

sampledata/src/main/scala/USStates.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ object USStates extends CSVSampleData {
55
type Value = Map[USState, USStateData]
66

77
def load(): Value = {
8-
loadRows("US_States.csv").collect {
8+
loadRows(GZip("US_States.csv")).collect {
99
case List(region, name, USState(state), geom, _) =>
1010
val coords = xml.XML.loadString(geom) \\ "outerBoundaryIs" \ "LinearRing" \ "coordinates"
1111
val Array(lons, lats, _) = coords.head.text.split(" ").map(_.split(",").map(_.toDouble)).transpose

sampledata/src/main/scala/WorldCities.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package sampledata
33

44
object WorldCities extends CSVSampleData {
55
def load(): List[WorldCity] = {
6-
loadRows("world_cities.csv").map {
6+
loadRows(Zip("world_cities.csv")).map {
77
case List(name, lat, lng) => WorldCity(name, lat.toDouble, lng.toDouble)
88
}
99
}

0 commit comments

Comments
 (0)