The problem is that we’re using the http directory listing, which I now realise will cause trouble for wget, curl and co. What you need to do is get the text of the directory listing and then convert it into file names. You can then download each of the file names. I know this is tedious, but you should be able to figure it out.
Here’s an example of how you can get the file names and download them using R.
[code]# script to demonstrate downloading NWTC met tower data using R
script to demonstrate downloading NWTC met tower data using R
START OF INPUTS
get the location of the files online (ony need the root directory)
NREL.URL.Base ← “Index of /MetData/135mData/M5Twr/20Hz/mat”
and define the location we will write to locally (again, only the root)
Local.URL.Base ← “~/Documents/temp”
define the
my.years = c(“2011”,“2012”)
my.months = c(“01”,“02”,“03”,“04”,“05”,“06”,“07”,“08”,“09”,“10”,“11”,“12”)
my.days = c(“01”,“02”)
END OF INPUTS
load packages
require(RCurl)
define the connection we will use to the NREL database
NREL.con = getCurlHandle(ftp.use.epsv = FALSE,
maxconnects=1,
fresh.connect=0,
timeout = 60,
useragent = “R”)
loop through the times and dates we defined
for (year in my.years){
for (month in my.months){
for (day in my.days){
date.path ← paste(year,“/”,
month,“/”,
day,“/”,
sep = “”)
# make the URL we want to check
source.file.path ← paste(NREL.URL.Base,“/”,
date.path,
sep = “”)
if (url.exists(url = source.file.path)){
# get a file listing
source.listing ← unlist(strsplit(getURL(source.file.path,
curl = NREL.con,
verbose=FALSE),
“\n”))
# scrape that listing into a list of files
matches <- regexpr(pattern = "#?[0-9\\_]{18}\\.mat(?=<)",
text = source.listing,
perl = TRUE)
mat.files <- NULL
for (row in 1:NROW(matches)){
if(matches[row] >0 ){
mat.files <- c(mat.files, substr(source.listing[row],
matches[row],
matches[row]+attr(matches,"match.length")[row]-1))
attr(matches,"match.length")[row]
} else {
# no match
}
}
# make a directory to dump the files into
dest.file.path = paste(Local.URL.Base,"/",
date.path,
sep = "")
dir.create(dest.file.path,
recursive = TRUE)
for (row in 1:NROW(mat.files)){
download.file(url = paste(NREL.URL.Base,"/",
date.path,
mat.files[row],
sep = ""),
destfile = paste(Local.URL.Base,"/",
date.path,
mat.files[row],
sep = ""))
}
} # end of url.exists loop
} # end of the day loop
} # end of the month loop
} # end of the day loop
[/code]