diff --git a/Tagger/Tagger.jar b/Tagger/Tagger.jar index 590476f..77d555b 100644 Binary files a/Tagger/Tagger.jar and b/Tagger/Tagger.jar differ diff --git a/Tagger/src/PixivManager.java b/Tagger/src/PixivManager.java index 436f4d1..2ca1d4b 100644 --- a/Tagger/src/PixivManager.java +++ b/Tagger/src/PixivManager.java @@ -1,5 +1,6 @@ import java.io.BufferedWriter; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.net.URL; @@ -28,6 +29,8 @@ public class PixivManager { } } } + File skippedItems = new File("skippedItems.txt"); + skippedItems.delete(); folder.mkdirs(); File outputTest = new File("TAG_DATA.txt"); FileWriter fwOutput; @@ -42,104 +45,114 @@ public class PixivManager { if (!new File("downloadedData/temp"+s+".html").exists()) { System.out.println("Starting download of "+url+" ..."); utils.downloadFileFromUrl(url, "downloadedData/temp"+s+".html"); - String[] data = utils.readFromFile("downloadedData/temp"+s+".html"); - int scriptEndLine = 0; - while (scriptEndLine")+"///"+cutpos); - if (cutpos")+3)); - System.out.println(data[scriptEndLine].substring(cutpos,data[scriptEndLine].indexOf("}}}'>")+3)); + if (scriptEndLine==data.length) { + System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); + System.out.println(" IMAGE "+s+" FAILED TO PARSE CORRECTLY! Something is messed up about the file!!"); + System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); } - bw.close(); - fw.close(); - } catch (IOException e) { - e.printStackTrace(); - } - JSONObject jsonData = utils.readJsonFromFile("finaltemp"); - //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload")))); - //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload").getJSONObject("illust")))); - JSONArray tagsArray = jsonData.getJSONObject("illust").getJSONObject(s).getJSONObject("tags").getJSONArray("tags"); - for (int i=0;i")+"///"+cutpos); + if (cutpos")+3)); + System.out.println(data[scriptEndLine].substring(cutpos,data[scriptEndLine].indexOf("}}}'>")+3)); } - } else - if (tag.has("tag") && /*romaji.length()==0 &&*/ !tag.getString("tag").matches(".*[ぁ-んァ-ン一-龯]")) { - hasEnglishTag=true; - ENTag = tag.getString("tag"); - } - - if (ENTag.replaceAll("\\?", "").trim().length()==0) { - ENTag=""; - hasEnglishTag=false; + bw.close(); + fw.close(); + } catch (IOException e) { + e.printStackTrace(); } - boolean tagSubmitted=false; - String insertedTag=""; - if (hasEnglishTag && ENTag.length()>0) { - insertedTag = ENTag; - tagSubmitted=true; - } /*else - if (romaji.length()>0){ - insertedTag = romaji; - tagSubmitted=true; - }*/ - if (tagSubmitted) { - if (imageTag.tag_whitelist.size()==0 || imageTag.tag_whitelist.containsKey(insertedTag.toLowerCase())) { - if (imageTag.taglist.containsKey(s)) { - List tags = imageTag.taglist.get(s); - tags.add(insertedTag); - imageTag.taglist.put(s, tags); - } else { - List tags = new ArrayList(); - tags.add(insertedTag); - imageTag.taglist.put(s,tags); + JSONObject jsonData = utils.readJsonFromFile("finaltemp"); + //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload")))); + //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload").getJSONObject("illust")))); + JSONArray tagsArray = jsonData.getJSONObject("illust").getJSONObject(s).getJSONObject("tags").getJSONArray("tags"); + for (int i=0;i0) { + insertedTag = ENTag; + tagSubmitted=true; + } /*else + if (romaji.length()>0){ + insertedTag = romaji; + tagSubmitted=true; + }*/ + if (tagSubmitted) { + if (imageTag.tag_whitelist.size()==0 || imageTag.tag_whitelist.containsKey(insertedTag.toLowerCase())) { + if (imageTag.taglist.containsKey(s)) { + List tags = imageTag.taglist.get(s); + tags.add(insertedTag); + imageTag.taglist.put(s, tags); + } else { + List tags = new ArrayList(); + tags.add(insertedTag); + imageTag.taglist.put(s,tags); + } + if (imageTag.tagCounter.containsKey(insertedTag)) { + imageTag.tagCounter.put(insertedTag,imageTag.tagCounter.get(insertedTag)+1); + } else { + imageTag.tagCounter.put(insertedTag,1); + } } } } + String taglist = s+": <"+imageTag.taglist.get(s)+">"; + System.out.println(taglist); + bwOutput.append(taglist); + bwOutput.newLine(); + //jsonData.getJSONObject("preload").getJSONObject("illust").getJSONObject(s).getJSONObject("tags"); + } else { + System.out.println("Skipping image "+s+" because webpage cannot be found."); + utils.logToFile(s+"\n", "skippedItems.txt"); } - String taglist = s+": <"+imageTag.taglist.get(s)+">"; - System.out.println(taglist); - bwOutput.append(taglist); - bwOutput.newLine(); - //jsonData.getJSONObject("preload").getJSONObject("illust").getJSONObject(s).getJSONObject("tags"); } else { System.out.println("Skipping image "+s+" because it has already been processed."); } } catch (IOException e) { - e.printStackTrace(); + if (e instanceof FileNotFoundException) { + System.out.println("Skipping image "+s+" because webpage cannot be found."); + utils.logToFile(s, "skippedItems.txt"); + } else { + e.printStackTrace(); + } } /*org.apache.commons.io.FileUtils.copyURLToFile(new URL( url diff --git a/Tagger/src/utils.java b/Tagger/src/utils.java index 8bc9012..e5c452d 100644 --- a/Tagger/src/utils.java +++ b/Tagger/src/utils.java @@ -1,6 +1,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; @@ -256,7 +257,7 @@ public class utils { return lastSlashpos; } - public static void downloadFileFromUrl(String url, String file) throws IOException, JSONException { + public static void downloadFileFromUrl(String url, String file) throws IOException, JSONException, FileNotFoundException { String temp = url.substring(0,LastSlash(url)); String temp2 = url.substring(LastSlash(url)); @@ -301,11 +302,7 @@ public class utils { return json; } - /*public static void logToFile(String message, String filename) { - logToFile(message,filename,false); - }*/ - - /*public static void logToFile(String message, String filename, boolean outputToChatLog) { + public static void logToFile(String message, String filename) { File file = new File(filename); try { @@ -322,10 +319,7 @@ public class utils { } catch (IOException e) { e.printStackTrace(); } - if (outputToChatLog && sigIRC.chatlogmodule_enabled) { - ChatLogMessage.importMessages(message); - } - }*/ + } public static void writetoFile(String[] data, String filename) { File file = new File(filename);