import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.imaging.ImageReadException; import org.json.JSONArray; import org.json.JSONObject; public class PixivManager { public PixivManager() { File folder = new File("downloadedData"); if (folder.exists()) { for (File fff : folder.listFiles()) { if (fff.isFile()) { fff.delete(); } } } File skippedItems = new File("skippedItems.txt"); skippedItems.delete(); folder.mkdirs(); File outputTest = new File("TAG_DATA.txt"); outputTest.delete(); FileWriter fwOutput,fwOutput2; BufferedWriter bwOutput,bwOutput2; try { fwOutput = new FileWriter(outputTest,true); bwOutput = new BufferedWriter(fwOutput); int imageCounter=0; while (imageCounter=imageTag.pixiv_image_list.size()) { break; } if (i!=0) { downloadData.append(","); } downloadData.append(imageTag.pixiv_image_list.get(imageCounter)); imageCounter++; } System.out.println("Sending request to server to download "+downloadData.toString()); utils.downloadFileFromUrl("http://45.33.13.215/crawler/request.php?images="+downloadData.toString(), "temp_req.html"); } for (String s : imageTag.pixiv_image_list) { AttemptDownload(bwOutput, s, true); } int retryAttempts=0; final int MAXATTEMPTS = 3; while (retryAttempts0) { List retryList = new ArrayList(); retryList.addAll(imageTag.pixiv_retry_list); imageTag.pixiv_retry_list.clear(); try { Thread.sleep(10000); } catch (InterruptedException e) { e.printStackTrace(); } for (String s : imageTag.pixiv_retry_list) { System.out.println(" Retry Attempt Number "+(retryAttempts+1)+"..."); AttemptDownload(bwOutput, s, (retryAttempts"); }*/ bwOutput.close(); fwOutput.close(); } catch (IOException e1) { e1.printStackTrace(); } Map sorted = sortByValues(imageTag.tagCounter); Set s = sorted.entrySet(); Iterator i = s.iterator(); File tagFile = new File("SORTED_TAGS.txt"); File tagFile2 = new File("RAW_TAGS.txt"); FileWriter fw,fw2; try { fw = new FileWriter(tagFile); fw2 = new FileWriter(tagFile2); BufferedWriter bw = new BufferedWriter(fw); BufferedWriter bw2 = new BufferedWriter(fw2); while (i.hasNext()) { Map.Entry m = (Map.Entry)i.next(); String key = (String)m.getKey(); Integer value = (Integer)m.getValue(); bw.write(key+" - "+value); bw.newLine(); bw2.write(key); bw2.newLine(); } bw.close(); bw2.close(); fw.close(); fw2.close(); } catch (IOException e1) { e1.printStackTrace(); } if (imageTag.tag_whitelist.size()==0) { System.out.println("whitelist.txt not found! No tagging will be done this time. TAG_DATA.txt populated."); } else { System.out.println("Tagging Images..."); for (int j=0;j")+"///"+cutpos); if (cutpos")+3)); System.out.println(data[scriptEndLine].substring(cutpos,data[scriptEndLine].indexOf("}}}'>")+3)); } bw.close(); fw.close(); } catch (IOException e) { e.printStackTrace(); } JSONObject jsonData = utils.readJsonFromFile("finaltemp"); //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload")))); //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload").getJSONObject("illust")))); JSONArray tagsArray = jsonData.getJSONObject("illust").getJSONObject(s).getJSONObject("tags").getJSONArray("tags"); for (int i=0;i0) { insertedTag = ENTag; tagSubmitted=true; } else if (romaji.length()>0){ insertedTag = romaji; tagSubmitted=true; } //insertedTag is the tag that will be used for the image. insertedTag = ConvertTag(insertedTag.trim().toLowerCase()); if (tagSubmitted) { if (imageTag.tag_whitelist.size()==0 || imageTag.tag_whitelist.containsKey(insertedTag.trim().toLowerCase())) { if (imageTag.taglist.containsKey(s)) { List tags = imageTag.taglist.get(s); tags.add(insertedTag); imageTag.taglist.put(s, tags); } else { List tags = new ArrayList(); tags.add(insertedTag); imageTag.taglist.put(s,tags); } if (imageTag.tagCounter.containsKey(insertedTag)) { imageTag.tagCounter.put(insertedTag,imageTag.tagCounter.get(insertedTag)+1); } else { imageTag.tagCounter.put(insertedTag,1); } } } } String taglist = s+": <"+imageTag.taglist.get(s)+">"; //System.out.println(taglist); bwOutput.append(taglist); bwOutput.newLine(); //jsonData.getJSONObject("preload").getJSONObject("illust").getJSONObject(s).getJSONObject("tags"); }/* else { System.out.println("Skipping image "+s+" because webpage cannot be found."); utils.logToFile(s+"\n", "skippedItems.txt"); }*/ } else { System.out.println("Skipping image "+s+" because the server couldn't find it. Will retry it later..."); if (addToRetryListOnFail) { imageTag.pixiv_retry_list.add(s); } //System.out.println("Skipping image "+s+" because it has already been processed."); } } } catch (IOException e) { if (e instanceof FileNotFoundException) { System.out.println("Skipping image "+s+" because webpage cannot be found."); utils.logToFile(s, "skippedItems.txt"); } else { e.printStackTrace(); } } /*org.apache.commons.io.FileUtils.copyURLToFile(new URL( url ),temp);*/ } private String ConvertTag(String insertedTag) { insertedTag = insertedTag.replaceAll(":", "-"); if (imageTag.subtaglist.containsKey(insertedTag.trim().toLowerCase())) { System.out.println(" Converting subtag "+insertedTag.trim().toLowerCase()+"->"+imageTag.subtaglist.get(insertedTag.trim().toLowerCase())); return imageTag.subtaglist.get(insertedTag.trim().toLowerCase()); } return insertedTag; } public static > Map sortByValues(final Map map) { Comparator valueComparator = new Comparator() { public int compare(K k1, K k2) { int compare = -map.get(k1).compareTo(map.get(k2)); if (compare == 0) return 1; else return compare; } }; Map sortedByValues = new TreeMap(valueComparator); sortedByValues.putAll(map); return sortedByValues; } }