Crombie Tools
dumpfilelist.sh
Go to the documentation of this file.
1 #!/bin/bash
2 
3 ##
4 # @file dumpfilelist.sh
5 #
6 # This is the file that creates the input files for running slimmers over EOS,
7 # files in Phedex, or other local directories.
8 #
9 # @todo Clean this up when not using multiple EOS directories
10 #
11 # @author Daniel Abercrombie <dabercro@mit.edu>
12 #
13 
14 isEOS=$1
15 
16 if [ ! -d $CrombieFullDir ] # Create directories to hold output
17 then # of the slimming
18 
19  mkdir -p $CrombieFullDir
20 
21 fi
22 
23 if [ ! -d $CrombieTempDir ] # Create also a temporary directory
24 then # to hold the lxplus output directly
25 
26  mkdir -p $CrombieTempDir
27 
28 else
29 
30  rm $CrombieTempDir/*.txt 2> /dev/null
31 
32  if [ "$fresh" = "fresh" ] # User can specify a fresh lxbatch run
33  then # which clears out temp .root files from before
34 
35  rm $CrombieTempDir/*.root 2> /dev/null
36 
37  fi
38 fi
39 
40 # Check to see if we will be using multi EOS
41 
42 if [ -f $CrombieEosDir ]
43 then
44  CrombieEosDir=`cat $CrombieEosDir` # Set list of EOS directories
45  usingMultiEOS=1
46 else
47  usingMultiEOS=0
48 fi
49 
50 # There are now two instances of EOS that we can store Nero ntuples on
51 
52 eosCMS=/afs/cern.ch/project/eos/installation/0.3.84-aquamarine/bin/eos.select
53 eosUSER=/afs/cern.ch/project/eos/installation/0.3.84-aquamarine.user/bin/eos.select
54 
55 logDir=bout # Make a log directory
56 if [ ! -d $logDir -a "$isEOS" = "eos" ] # if running on EOS
57 then
58  echo "Making log output directory as $logDir"
59  mkdir $logDir
60 fi
61 
62 trackEOS=0 # % 2 == 0 will be CMS, % 2 == 1 will be USER
63 eosCommand=$eosCMS
64 
65 RunOnList=$CrombieTempDir/RunOnList_`date +%y%m%d%H%M`.txt
66 if [ "$isEOS" = "eos" ]
67 then
68  > $RunOnList
69  eosarr=(`echo $CrombieEosDir`)
70  for line in ${eosarr[@]}
71  do
72  dirContent=(`$eosCommand ls $line`)
73  if [ "$dirContent" = "" ]
74  then
75  trackEOS=$((trackEOS + 1))
76  if [ $((trackEOS % 2)) -eq 1 ]
77  then
78  eosCommand=$eosUSER
79  echo "eoshost=eosuser.cern.ch" >> $RunOnList
80  else
81  eosCommand=$eosCMS
82  echo "eoshost=eoscms.cern.ch" >> $RunOnList
83  fi
84  dirContent=`$eosCommand ls $line`
85  if [ "$dirContent" = "" ]
86  then
87  echo "Can't find $line in either instance of EOS..."
88  echo "Check configuration."
89  exit 1
90  fi
91  fi
92 
93  echo "eosdir="$line >> $RunOnList
94 
95  for dir in ${dirContent[@]}
96  do
97 
98  if [ "$CrombieDirList" != "" ] # If a DirList (sample list) is set, make sure the proposed directory is present
99  then
100  foundInList=0
101  for inList in `cat $CrombieDirList`
102  do
103  if [ "$dir" = "$inList" ]
104  then
105  foundInList=1
106  break
107  fi
108  done
109  if [ $foundInList -eq 0 ] # If not present, don't append it to the RunOnList
110  then
111  continue
112  fi
113  fi
114 
115  foundInRun=0 # Do a quick check to make sure we didn't pick up a sample in another location
116  for check in `cat $RunOnList` # Check samples found
117  do
118  if [ "$dir" = "$check" ]
119  then
120  foundInRun=1 # Pass flag if sample already will be run on
121  break
122  fi
123  done
124 
125  if [ $foundInRun -eq 0 ]
126  then
127  echo $dir >> $RunOnList # Add to the RunOnList
128  fi
129 
130  done
131  done
132 
133 else # If we're not using EOS
134  usingMultiEOS=0
135 
136  if [ -f $CrombieDatasets ]
137  then
138 
139  cp $CrombieDatasets $RunOnList
140 
141  else
142  if [ ! -d $CrombieRegDir ]
143  then
144  echo "$CrombieRegDir does not seem to exist. Maybe needs mounting."
145  exit 1
146  fi
147 
148  if [ "$CrombieDirList" = "" ]
149  then
150  ls $CrombieRegDir > $RunOnList # Dump the directory contents
151  else
152  cat $CrombieDirList > $RunOnList # Or just use the set directories
153  fi
154  fi
155 fi
156 
157 > $haddFile
158 
159 ranOnFile=0
160 
161 lastDir=''
162 
163 count=0
164 
165 eoshost=eoscms.cern.ch
166 if [ "$usingMultiEOS" -eq 1 ]
167 then
168  eosCommand=$eosCMS
169 fi
170 
171 
172 _CheckCount () {
173 
174  if [ "$fileInCount" -eq "$CrombieFilesPerJob" ]
175  then
176 
177  fileInCount=0
178  currentConfig=$CrombieTempDir/$CrombieFileBase\_$reasonableName\_$count.txt
179  count=$((count + 1))
180  > $currentConfig
181 
182  fi
183 
184 }
185 
186 for dir in `cat $RunOnList`
187 do
188 
189  if [ "${dir:0:1}" = "#" ] # Skip commented directories
190  then
191 
192  continue
193 
194  fi
195 
196  if [ "${dir%%=*}" = "eoshost" ] # Can change eoshost
197  then
198 
199  eoshost=${dir##*=}
200 
201  if [ "${eoshost%%.*}" = "eoscms" ]
202  then
203 
204  eosCommand=$eosCMS
205 
206  elif [ "${eoshost%%.*}" = "eosuser" ]
207  then
208 
209  eosCommand=$eosUSER
210 
211  else
212 
213  echo "Bad host found in config: $eoshost"
214  echo "Please check that."
215  exit 1
216 
217  fi
218 
219  continue
220 
221  elif [ "${dir%%=*}" = "eosdir" ] # Can change eosdir
222  then
223 
224  CrombieEosDir=${dir##*=}
225  continue
226 
227  fi
228 
229  fileInCount=$CrombieFilesPerJob
230 
231  stripleading="${dir#/}"
232  reasonableName="${stripleading%%/*}"
233  reasonableName="${reasonableName%%+*}" # For SubMIT-style dataset names
234 
235  if [ "$reasonableName" != "$lastDir" ] # Build the file that hadds everything
236  then
237 
238  count=0
239  lastDir=$reasonableName
240  echo $CrombieFullDir/$CrombieFileBase\_$reasonableName.root $CrombieTempDir/$CrombieFileBase\_$reasonableName"_[0-9]*.root" >> $haddFile
241 
242  fi
243 
244  if [ "$isEOS" = "eos" ] # Find all the root files in EOS
245  then
246 
247  for inFile in `$eosCommand find $CrombieEosDir/$dir`
248  do
249 
250  if [ "${inFile##*_}" = "pilot.root" -o "${inFile##*.}" != "root" ]
251  then
252 
253  continue
254 
255  elif echo $inFile | grep "/failed/"
256  then
257 
258  echo "Found a failed job. Resubmit that. I'm skipping for now."
259  continue
260 
261  fi
262 
263  _CheckCount
264 
265  echo root://$eoshost/$inFile >> $currentConfig
266  fileInCount=$((fileInCount + 1))
267 
268  done
269 
270  elif [ -f $CrombieDatasets ] # If we have a list of datasets, then dump the file names from Phedex
271  then
272 
273  outputJSON=$CrombieTempDir/$reasonableName.json
274 
275  if [ ! -f $outputJSON ]
276  then
277 
278  wget --no-check-certificate -O $outputJSON "https://cmsweb.cern.ch/phedex/datasvc/json/prod/data?dataset=$dir"
279 
280  fi
281 
282  for inFile in `jq '.phedex.dbs|.[].dataset|.[].block|.[].file|.[].lfn' $outputJSON | sed 's/"//g' | sort | uniq`
283  do
284 
285  _CheckCount
286 
287  echo root://$CrombieRedirector/$inFile >> $currentConfig
288  fileInCount=$((fileInCount + 1))
289 
290  done
291 
292  else # Otherwise, just find all of the .root files in a directory
293 
294  indexFiles=0
295 
296  for inFile in `find $CrombieRegDir/$dir -name '*.root'`; do
297 
298  if [ "${inFile##*_}" = "pilot.root" ]; then
299 
300  continue
301 
302  fi
303 
304  _CheckCount
305 
306  echo $inFile $CrombieTempDir/TerminalRunning/$CrombieFileBase\_$reasonableName\_$count\_$indexFiles.root >> $currentConfig
307  fileInCount=$((fileInCount + 1))
308  indexFiles=$((indexFiles + 1))
309 
310  done
311 
312  fi
313 
314 done
315 
316 cp $RunOnList $CrombieFullDir/.
317 
318 exit 0